diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/api.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/api.h new file mode 100644 index 0000000..a4aa567 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/core.c b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/core.c new file mode 100644 index 0000000..1f75093 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/core.c @@ -0,0 +1,123 @@ +#include "core.h" + +void process_data(state* s, unsigned char* out, const unsigned char* in, + unsigned long long len, u8 mode) { + u32_2 t0, t1; + u64 tmp0, tmp1; + u64 i; + + while (len >= RATE) { + tmp0 = U64BIG(*(u64*)in); + t0 = to_bit_interleaving(tmp0); + s->x0.e ^= t0.e; + s->x0.o ^= t0.o; + tmp1 = U64BIG(*(u64*)(in + 8)); + t1 = to_bit_interleaving(tmp1); + s->x1.e ^= t1.e; + s->x1.o ^= t1.o; + if (mode != ASCON_AD) { + tmp0 = from_bit_interleaving(s->x0); + *(u64*)out = U64BIG(tmp0); + tmp1 = from_bit_interleaving(s->x1); + *(u64*)(out + 8) = U64BIG(tmp1); + } + if (mode == ASCON_DEC) { + s->x0 = t0; + s->x1 = t1; + } + P(s, PB_ROUNDS); + in += RATE; + out += RATE; + len -= RATE; + } + + tmp0 = 0; + tmp1 = 0; + for (i = 0; i < len; ++i, ++in) + if (i < 8) + tmp0 ^= INS_BYTE64(*in, i); + else + tmp1 ^= INS_BYTE64(*in, i % 8); + in -= len; + if (len < 8) + tmp0 ^= INS_BYTE64(0x80, len); + else + tmp1 ^= INS_BYTE64(0x80, len % 8); + t0 = to_bit_interleaving(tmp0); + s->x0.e ^= t0.e; + s->x0.o ^= t0.o; + t1 = to_bit_interleaving(tmp1); + s->x1.e ^= t1.e; + s->x1.o ^= t1.o; + if (mode != ASCON_AD) { + tmp0 = from_bit_interleaving(s->x0); + tmp1 = from_bit_interleaving(s->x1); + for (i = 0; i < len; ++i, ++out) + if (i < 8) + *out = EXT_BYTE64(tmp0, i); + else + *out = EXT_BYTE64(tmp1, i % 8); + } + if (mode == ASCON_DEC) { + for (i = 0; i < len; ++i, ++in) + if (i < 8) { + tmp0 &= ~INS_BYTE64(0xff, i); + tmp0 |= INS_BYTE64(*in, i); + } else { + tmp1 &= ~INS_BYTE64(0xff, i % 8); + tmp1 |= INS_BYTE64(*in, i % 8); + } + s->x0 = to_bit_interleaving(tmp0); + s->x1 = to_bit_interleaving(tmp1); + } +} + +void ascon_core(state* s, unsigned char* out, const unsigned char* in, + unsigned long long tlen, const unsigned char* ad, + unsigned long long adlen, const unsigned char* npub, + const unsigned char* k, u8 mode) { + u32_2 K0, K1, N0, N1; + + // load key and nonce + K0 = to_bit_interleaving(U64BIG(*(u64*)k)); + K1 = to_bit_interleaving(U64BIG(*(u64*)(k + 8))); + N0 = to_bit_interleaving(U64BIG(*(u64*)npub)); + N1 = to_bit_interleaving(U64BIG(*(u64*)(npub + 8))); + + // initialization + s->x0 = to_bit_interleaving(IV); + s->x1.o = K0.o; + s->x1.e = K0.e; + s->x2.e = K1.e; + s->x2.o = K1.o; + s->x3.e = N0.e; + s->x3.o = N0.o; + s->x4.e = N1.e; + s->x4.o = N1.o; + P(s, PA_ROUNDS); + s->x3.e ^= K0.e; + s->x3.o ^= K0.o; + s->x4.e ^= K1.e; + s->x4.o ^= K1.o; + + // process associated data + if (adlen) { + process_data(s, (void*)0, ad, adlen, ASCON_AD); + P(s, PB_ROUNDS); + } + s->x4.e ^= 1; + + // process plaintext/ciphertext + process_data(s, out, in, tlen, mode); + + // finalization + s->x2.e ^= K0.e; + s->x2.o ^= K0.o; + s->x3.e ^= K1.e; + s->x3.o ^= K1.o; + P(s, PA_ROUNDS); + s->x3.e ^= K0.e; + s->x3.o ^= K0.o; + s->x4.e ^= K1.e; + s->x4.o ^= K1.o; +} diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/core.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/core.h new file mode 100644 index 0000000..4a5330f --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/core.h @@ -0,0 +1,27 @@ +#ifndef CORE_H_ +#define CORE_H_ + +#include "api.h" +#include "endian.h" +#include "permutations.h" + +#define ASCON_AD 0 +#define ASCON_ENC 1 +#define ASCON_DEC 2 + +#define RATE (128 / 8) +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define IV \ + ((u64)(8 * (CRYPTO_KEYBYTES)) << 56 | (u64)(8 * (RATE)) << 48 | \ + (u64)(PA_ROUNDS) << 40 | (u64)(PB_ROUNDS) << 32) + +void process_data(state* s, unsigned char* out, const unsigned char* in, + unsigned long long len, u8 mode); + +void ascon_core(state* s, unsigned char* out, const unsigned char* in, + unsigned long long tlen, const unsigned char* ad, + unsigned long long adlen, const unsigned char* npub, + const unsigned char* k, u8 mode); + +#endif // CORE_H_ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/decrypt.c b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/decrypt.c new file mode 100644 index 0000000..7e9dd1a --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/decrypt.c @@ -0,0 +1,32 @@ +#include "core.h" + +int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen, + unsigned char* nsec, const unsigned char* c, + unsigned long long clen, const unsigned char* ad, + unsigned long long adlen, const unsigned char* npub, + const unsigned char* k) { + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + + state s; + u32_2 t0, t1; + (void)nsec; + + // set plaintext size + *mlen = clen - CRYPTO_ABYTES; + + ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DEC); + + // verify tag (should be constant time, check compiler output) + t0 = to_bit_interleaving(U64BIG(*(u64*)(c + *mlen))); + t1 = to_bit_interleaving(U64BIG(*(u64*)(c + *mlen + 8))); + if (((s.x3.e ^ t0.e) | (s.x3.o ^ t0.o) | (s.x4.e ^ t1.e) | (s.x4.o ^ t1.o)) != + 0) { + *mlen = 0; + return -1; + } + + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/encrypt.c b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/encrypt.c new file mode 100644 index 0000000..b5dc587 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/encrypt.c @@ -0,0 +1,24 @@ +#include "core.h" + +int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen, + const unsigned char* m, unsigned long long mlen, + const unsigned char* ad, unsigned long long adlen, + const unsigned char* nsec, const unsigned char* npub, + const unsigned char* k) { + state s; + u64 tmp0, tmp1; + (void)nsec; + + // set ciphertext size + *clen = mlen + CRYPTO_ABYTES; + + ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENC); + + // set tag + tmp0 = from_bit_interleaving(s.x3); + *(u64*)(c + mlen) = U64BIG(tmp0); + tmp1 = from_bit_interleaving(s.x4); + *(u64*)(c + mlen + 8) = U64BIG(tmp1); + + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/endian.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/endian.h new file mode 100644 index 0000000..b4d18f5 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/endian.h @@ -0,0 +1,29 @@ +#ifndef ENDIAN_H_ +#define ENDIAN_H_ + +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + +// macros for big endian machines +#define U64BIG(x) (x) +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +#elif defined(_MSC_VER) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + +// macros for little endian machines +#define U64BIG(x) \ + ((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) | \ + (((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) | \ + (((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) | \ + (((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56)) +#define U32BIG(x) \ + ((((x)&0x000000FF) << 24) | (((x)&0x0000FF00) << 8) | \ + (((x)&0x00FF0000) >> 8) | (((x)&0xFF000000) >> 24)) +#define U16BIG(x) ((((x)&0x00FF) << 8) | (((x)&0xFF00) >> 8)) + +#else +#error "ascon byte order macros not defined in endian.h" +#endif + +#endif // ENDIAN_H_ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/implementors b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/implementors new file mode 100644 index 0000000..b110c1a --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/implementors @@ -0,0 +1,2 @@ +Christoph Dobraunig +Martin Schläffer diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/permutations.c b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/permutations.c new file mode 100644 index 0000000..bc47f5f --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/permutations.c @@ -0,0 +1,50 @@ +#include "permutations.h" + +static const u8 constants[][2] = { + {0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, {0x6, 0xc}, {0x3, 0xc}, + {0x6, 0x9}, {0x3, 0x9}, {0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}}; + +// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 +u32_2 to_bit_interleaving(u64 in) { + u32 hi = (in) >> 32; + u32 lo = (u32)(in); + u32 r0, r1; + u32_2 out; + r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); + r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); + r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); + r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); + r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); + r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); + r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); + r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); + out.e = (lo & 0x0000FFFF) | (hi << 16); + out.o = (lo >> 16) | (hi & 0xFFFF0000); + return out; +} + +// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 +u64 from_bit_interleaving(u32_2 in) { + u32 lo = (in.e & 0x0000FFFF) | (in.o << 16); + u32 hi = (in.e >> 16) | (in.o & 0xFFFF0000); + u32 r0, r1; + u64 out; + r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); + r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); + r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); + r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); + r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); + r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); + r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); + r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); + out = (u64)hi << 32 | lo; + return out; +} + +void P(state *p, u8 rounds) { + state s = *p; + u32_2 t0, t1, t2, t3, t4; + u32 i, start = START_ROUND(rounds); + for (i = start; i < 12; i++) ROUND(constants[i][0], constants[i][1]); + *p = s; +} diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/permutations.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/permutations.h new file mode 100644 index 0000000..bc643ce --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/permutations.h @@ -0,0 +1,71 @@ +#ifndef PERMUTATIONS_H_ +#define PERMUTATIONS_H_ + +typedef unsigned char u8; +typedef unsigned int u32; +typedef unsigned long long u64; + +typedef struct { + u32 e; + u32 o; +} u32_2; + +typedef struct { + u32_2 x0; + u32_2 x1; + u32_2 x2; + u32_2 x3; + u32_2 x4; +} state; + +#define EXT_BYTE64(x, n) ((u8)((u64)(x) >> (8 * (7 - (n))))) +#define INS_BYTE64(x, n) ((u64)(x) << (8 * (7 - (n)))) +#define ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n)))) +#define START_ROUND(x) (12 - (x)) + +// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 +u32_2 to_bit_interleaving(u64 in); + +// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 +u64 from_bit_interleaving(u32_2 in); + +/* clang-format off */ +#define ROUND(C_e, C_o) \ + do { \ + /* round constant */ \ + s.x2.e ^= C_e; s.x2.o ^= C_o; \ + /* s-box layer */ \ + s.x0.e ^= s.x4.e; s.x0.o ^= s.x4.o; \ + s.x4.e ^= s.x3.e; s.x4.o ^= s.x3.o; \ + s.x2.e ^= s.x1.e; s.x2.o ^= s.x1.o; \ + t0.e = s.x0.e; t0.o = s.x0.o; \ + t4.e = s.x4.e; t4.o = s.x4.o; \ + t3.e = s.x3.e; t3.o = s.x3.o; \ + t1.e = s.x1.e; t1.o = s.x1.o; \ + t2.e = s.x2.e; t2.o = s.x2.o; \ + s.x0.e = t0.e ^ (~t1.e & t2.e); s.x0.o = t0.o ^ (~t1.o & t2.o); \ + s.x2.e = t2.e ^ (~t3.e & t4.e); s.x2.o = t2.o ^ (~t3.o & t4.o); \ + s.x4.e = t4.e ^ (~t0.e & t1.e); s.x4.o = t4.o ^ (~t0.o & t1.o); \ + s.x1.e = t1.e ^ (~t2.e & t3.e); s.x1.o = t1.o ^ (~t2.o & t3.o); \ + s.x3.e = t3.e ^ (~t4.e & t0.e); s.x3.o = t3.o ^ (~t4.o & t0.o); \ + s.x1.e ^= s.x0.e; s.x1.o ^= s.x0.o; \ + s.x3.e ^= s.x2.e; s.x3.o ^= s.x2.o; \ + s.x0.e ^= s.x4.e; s.x0.o ^= s.x4.o; \ + /* linear layer */ \ + t0.e = s.x0.e ^ ROTR32(s.x0.o, 4); t0.o = s.x0.o ^ ROTR32(s.x0.e, 5); \ + t1.e = s.x1.e ^ ROTR32(s.x1.e, 11); t1.o = s.x1.o ^ ROTR32(s.x1.o, 11); \ + t2.e = s.x2.e ^ ROTR32(s.x2.o, 2); t2.o = s.x2.o ^ ROTR32(s.x2.e, 3); \ + t3.e = s.x3.e ^ ROTR32(s.x3.o, 3); t3.o = s.x3.o ^ ROTR32(s.x3.e, 4); \ + t4.e = s.x4.e ^ ROTR32(s.x4.e, 17); t4.o = s.x4.o ^ ROTR32(s.x4.o, 17); \ + s.x0.e ^= ROTR32(t0.o, 9); s.x0.o ^= ROTR32(t0.e, 10); \ + s.x1.e ^= ROTR32(t1.o, 19); s.x1.o ^= ROTR32(t1.e, 20); \ + s.x2.e ^= t2.o; s.x2.o ^= ROTR32(t2.e, 1); \ + s.x3.e ^= ROTR32(t3.e, 5); s.x3.o ^= ROTR32(t3.o, 5); \ + s.x4.e ^= ROTR32(t4.o, 3); s.x4.o ^= ROTR32(t4.e, 4); \ + s.x2.e = ~s.x2.e; s.x2.o = ~s.x2.o; \ + } while(0) +/* clang-format on */ + +void P(state *p, u8 rounds); + +#endif // PERMUTATIONS_H_ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/api.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/api.h new file mode 100644 index 0000000..a4aa567 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/core.c b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/core.c new file mode 100644 index 0000000..676f436 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/core.c @@ -0,0 +1,78 @@ +#include "core.h" + +void process_data(state* s, unsigned char* out, const unsigned char* in, + unsigned long long len, u8 mode) { + u64* x; + u64 i; + + while (len >= RATE) { + s->x0 ^= U64BIG(*(u64*)in); + s->x1 ^= U64BIG(*(u64*)(in + 8)); + if (mode != ASCON_AD) { + *(u64*)out = U64BIG(s->x0); + *(u64*)(out + 8) = U64BIG(s->x1); + } + if (mode == ASCON_DEC) { + s->x0 = U64BIG(*((u64*)in)); + s->x1 = U64BIG(*((u64*)(in + 8))); + } + P(s, PB_ROUNDS); + in += RATE; + out += RATE; + len -= RATE; + } + + for (i = 0; i < len; ++i, ++out, ++in) { + if (i < 8) + x = &(s->x0); + else + x = &(s->x1); + *x ^= INS_BYTE64(*in, i % 8); + if (mode != ASCON_AD) *out = EXT_BYTE64(*x, i % 8); + if (mode == ASCON_DEC) { + *x &= ~INS_BYTE64(0xff, i % 8); + *x |= INS_BYTE64(*in, i % 8); + } + } + if (len < 8) + s->x0 ^= INS_BYTE64(0x80, len); + else + s->x1 ^= INS_BYTE64(0x80, len % 8); +} + +void ascon_core(state* s, unsigned char* out, const unsigned char* in, + unsigned long long tlen, const unsigned char* ad, + unsigned long long adlen, const unsigned char* npub, + const unsigned char* k, u8 mode) { + const u64 K0 = U64BIG(*(u64*)k); + const u64 K1 = U64BIG(*(u64*)(k + 8)); + const u64 N0 = U64BIG(*(u64*)npub); + const u64 N1 = U64BIG(*(u64*)(npub + 8)); + + // initialization + s->x0 = IV; + s->x1 = K0; + s->x2 = K1; + s->x3 = N0; + s->x4 = N1; + P(s, PA_ROUNDS); + s->x3 ^= K0; + s->x4 ^= K1; + + // process associated data + if (adlen) { + process_data(s, (void*)0, ad, adlen, ASCON_AD); + P(s, PB_ROUNDS); + } + s->x4 ^= 1; + + // process plaintext/ciphertext + process_data(s, out, in, tlen, mode); + + // finalization + s->x2 ^= K0; + s->x3 ^= K1; + P(s, PA_ROUNDS); + s->x3 ^= K0; + s->x4 ^= K1; +} diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/core.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/core.h new file mode 100644 index 0000000..4a5330f --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/core.h @@ -0,0 +1,27 @@ +#ifndef CORE_H_ +#define CORE_H_ + +#include "api.h" +#include "endian.h" +#include "permutations.h" + +#define ASCON_AD 0 +#define ASCON_ENC 1 +#define ASCON_DEC 2 + +#define RATE (128 / 8) +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define IV \ + ((u64)(8 * (CRYPTO_KEYBYTES)) << 56 | (u64)(8 * (RATE)) << 48 | \ + (u64)(PA_ROUNDS) << 40 | (u64)(PB_ROUNDS) << 32) + +void process_data(state* s, unsigned char* out, const unsigned char* in, + unsigned long long len, u8 mode); + +void ascon_core(state* s, unsigned char* out, const unsigned char* in, + unsigned long long tlen, const unsigned char* ad, + unsigned long long adlen, const unsigned char* npub, + const unsigned char* k, u8 mode); + +#endif // CORE_H_ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/decrypt.c b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/decrypt.c new file mode 100644 index 0000000..0cde81e --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/decrypt.c @@ -0,0 +1,29 @@ +#include "core.h" + +int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen, + unsigned char* nsec, const unsigned char* c, + unsigned long long clen, const unsigned char* ad, + unsigned long long adlen, const unsigned char* npub, + const unsigned char* k) { + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + + state s; + (void)nsec; + + // set plaintext size + *mlen = clen - CRYPTO_ABYTES; + + ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DEC); + + // verify tag (should be constant time, check compiler output) + if (((s.x3 ^ U64BIG(*(u64*)(c + *mlen))) | + (s.x4 ^ U64BIG(*(u64*)(c + *mlen + 8)))) != 0) { + *mlen = 0; + return -1; + } + + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/encrypt.c b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/encrypt.c new file mode 100644 index 0000000..5961c60 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/encrypt.c @@ -0,0 +1,21 @@ +#include "core.h" + +int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen, + const unsigned char* m, unsigned long long mlen, + const unsigned char* ad, unsigned long long adlen, + const unsigned char* nsec, const unsigned char* npub, + const unsigned char* k) { + state s; + (void)nsec; + + // set ciphertext size + *clen = mlen + CRYPTO_ABYTES; + + ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENC); + + // set tag + *(u64*)(c + mlen) = U64BIG(s.x3); + *(u64*)(c + mlen + 8) = U64BIG(s.x4); + + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/endian.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/endian.h new file mode 100644 index 0000000..b4d18f5 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/endian.h @@ -0,0 +1,29 @@ +#ifndef ENDIAN_H_ +#define ENDIAN_H_ + +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + +// macros for big endian machines +#define U64BIG(x) (x) +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +#elif defined(_MSC_VER) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + +// macros for little endian machines +#define U64BIG(x) \ + ((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) | \ + (((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) | \ + (((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) | \ + (((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56)) +#define U32BIG(x) \ + ((((x)&0x000000FF) << 24) | (((x)&0x0000FF00) << 8) | \ + (((x)&0x00FF0000) >> 8) | (((x)&0xFF000000) >> 24)) +#define U16BIG(x) ((((x)&0x00FF) << 8) | (((x)&0xFF00) >> 8)) + +#else +#error "ascon byte order macros not defined in endian.h" +#endif + +#endif // ENDIAN_H_ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/implementors b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/implementors new file mode 100644 index 0000000..b110c1a --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/implementors @@ -0,0 +1,2 @@ +Christoph Dobraunig +Martin Schläffer diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/permutations.c b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/permutations.c new file mode 100644 index 0000000..9aaf9d1 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/permutations.c @@ -0,0 +1,8 @@ +#include "permutations.h" + +void P(state *p, u8 rounds) { + state s = *p; + u8 i, start = START_CONSTANT(rounds); + for (i = start; i > 0x4a; i -= 0x0f) ROUND(i); + *p = s; +} diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/permutations.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/permutations.h new file mode 100644 index 0000000..7143e82 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/permutations.h @@ -0,0 +1,66 @@ +#ifndef PERMUTATIONS_H_ +#define PERMUTATIONS_H_ + +typedef unsigned char u8; +typedef unsigned long long u64; + +typedef struct { + u64 x0, x1, x2, x3, x4; +} state; + +#define EXT_BYTE64(x, n) ((u8)((u64)(x) >> (8 * (7 - (n))))) +#define INS_BYTE64(x, n) ((u64)(x) << (8 * (7 - (n)))) +#define ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) +#define START_CONSTANT(x) (((0xf - (12 - (x))) << 4) | (12 - (x))) + +#define ROUND(C) \ + do { \ + state t; \ + s.x2 ^= C; \ + s.x0 ^= s.x4; \ + s.x4 ^= s.x3; \ + s.x2 ^= s.x1; \ + t.x0 = s.x0; \ + t.x4 = s.x4; \ + t.x3 = s.x3; \ + t.x1 = s.x1; \ + t.x2 = s.x2; \ + s.x0 = t.x0 ^ ((~t.x1) & t.x2); \ + s.x2 = t.x2 ^ ((~t.x3) & t.x4); \ + s.x4 = t.x4 ^ ((~t.x0) & t.x1); \ + s.x1 = t.x1 ^ ((~t.x2) & t.x3); \ + s.x3 = t.x3 ^ ((~t.x4) & t.x0); \ + s.x1 ^= s.x0; \ + t.x1 = s.x1; \ + s.x1 = ROTR64(s.x1, 39); \ + s.x3 ^= s.x2; \ + t.x2 = s.x2; \ + s.x2 = ROTR64(s.x2, 1); \ + t.x4 = s.x4; \ + t.x2 ^= s.x2; \ + s.x2 = ROTR64(s.x2, 6 - 1); \ + t.x3 = s.x3; \ + t.x1 ^= s.x1; \ + s.x3 = ROTR64(s.x3, 10); \ + s.x0 ^= s.x4; \ + s.x4 = ROTR64(s.x4, 7); \ + t.x3 ^= s.x3; \ + s.x2 ^= t.x2; \ + s.x1 = ROTR64(s.x1, 61 - 39); \ + t.x0 = s.x0; \ + s.x2 = ~s.x2; \ + s.x3 = ROTR64(s.x3, 17 - 10); \ + t.x4 ^= s.x4; \ + s.x4 = ROTR64(s.x4, 41 - 7); \ + s.x3 ^= t.x3; \ + s.x1 ^= t.x1; \ + s.x0 = ROTR64(s.x0, 19); \ + s.x4 ^= t.x4; \ + t.x0 ^= s.x0; \ + s.x0 = ROTR64(s.x0, 28 - 19); \ + s.x0 ^= t.x0; \ + } while (0) + +void P(state *p, u8 rounds); + +#endif // PERMUTATIONS_H_ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/api.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/api.h new file mode 100644 index 0000000..a4aa567 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/core.c b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/core.c new file mode 100644 index 0000000..79db104 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/core.c @@ -0,0 +1,93 @@ +#include "core.h" + +void process_data(state* s, unsigned char* out, const unsigned char* in, + unsigned long long len, u8 mode) { + u32_2 t0; + u64 tmp0; + u64 i; + + while (len >= RATE) { + tmp0 = U64BIG(*(u64*)in); + t0 = to_bit_interleaving(tmp0); + s->x0.e ^= t0.e; + s->x0.o ^= t0.o; + if (mode != ASCON_AD) { + tmp0 = from_bit_interleaving(s->x0); + *(u64*)out = U64BIG(tmp0); + } + if (mode == ASCON_DEC) s->x0 = t0; + P(s, PB_ROUNDS); + in += RATE; + out += RATE; + len -= RATE; + } + + tmp0 = 0; + for (i = 0; i < len; ++i, ++in) tmp0 |= INS_BYTE64(*in, i); + in -= len; + tmp0 |= INS_BYTE64(0x80, len); + t0 = to_bit_interleaving(tmp0); + s->x0.e ^= t0.e; + s->x0.o ^= t0.o; + if (mode != ASCON_AD) { + tmp0 = from_bit_interleaving(s->x0); + for (i = 0; i < len; ++i, ++out) *out = EXT_BYTE64(tmp0, i); + } + if (mode == ASCON_DEC) { + for (i = 0; i < len; ++i, ++in) { + tmp0 &= ~INS_BYTE64(0xff, i); + tmp0 |= INS_BYTE64(*in, i); + } + s->x0 = to_bit_interleaving(tmp0); + } +} + +void ascon_core(state* s, unsigned char* out, const unsigned char* in, + unsigned long long tlen, const unsigned char* ad, + unsigned long long adlen, const unsigned char* npub, + const unsigned char* k, u8 mode) { + u32_2 K0, K1, N0, N1; + + // load key and nonce + K0 = to_bit_interleaving(U64BIG(*(u64*)k)); + K1 = to_bit_interleaving(U64BIG(*(u64*)(k + 8))); + N0 = to_bit_interleaving(U64BIG(*(u64*)npub)); + N1 = to_bit_interleaving(U64BIG(*(u64*)(npub + 8))); + + // initialization + s->x0 = to_bit_interleaving(IV); + s->x1.o = K0.o; + s->x1.e = K0.e; + s->x2.e = K1.e; + s->x2.o = K1.o; + s->x3.e = N0.e; + s->x3.o = N0.o; + s->x4.e = N1.e; + s->x4.o = N1.o; + P(s, PA_ROUNDS); + s->x3.e ^= K0.e; + s->x3.o ^= K0.o; + s->x4.e ^= K1.e; + s->x4.o ^= K1.o; + + // process associated data + if (adlen) { + process_data(s, (void*)0, ad, adlen, ASCON_AD); + P(s, PB_ROUNDS); + } + s->x4.e ^= 1; + + // process plaintext/ciphertext + process_data(s, out, in, tlen, mode); + + // finalization + s->x1.e ^= K0.e; + s->x1.o ^= K0.o; + s->x2.e ^= K1.e; + s->x2.o ^= K1.o; + P(s, PA_ROUNDS); + s->x3.e ^= K0.e; + s->x3.o ^= K0.o; + s->x4.e ^= K1.e; + s->x4.o ^= K1.o; +} diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/core.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/core.h new file mode 100644 index 0000000..90076c1 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/core.h @@ -0,0 +1,27 @@ +#ifndef CORE_H_ +#define CORE_H_ + +#include "api.h" +#include "endian.h" +#include "permutations.h" + +#define ASCON_AD 0 +#define ASCON_ENC 1 +#define ASCON_DEC 2 + +#define RATE (64 / 8) +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define IV \ + ((u64)(8 * (CRYPTO_KEYBYTES)) << 56 | (u64)(8 * (RATE)) << 48 | \ + (u64)(PA_ROUNDS) << 40 | (u64)(PB_ROUNDS) << 32) + +void process_data(state* s, unsigned char* out, const unsigned char* in, + unsigned long long len, u8 mode); + +void ascon_core(state* s, unsigned char* out, const unsigned char* in, + unsigned long long tlen, const unsigned char* ad, + unsigned long long adlen, const unsigned char* npub, + const unsigned char* k, u8 mode); + +#endif // CORE_H_ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/decrypt.c b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/decrypt.c new file mode 100644 index 0000000..7e9dd1a --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/decrypt.c @@ -0,0 +1,32 @@ +#include "core.h" + +int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen, + unsigned char* nsec, const unsigned char* c, + unsigned long long clen, const unsigned char* ad, + unsigned long long adlen, const unsigned char* npub, + const unsigned char* k) { + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + + state s; + u32_2 t0, t1; + (void)nsec; + + // set plaintext size + *mlen = clen - CRYPTO_ABYTES; + + ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DEC); + + // verify tag (should be constant time, check compiler output) + t0 = to_bit_interleaving(U64BIG(*(u64*)(c + *mlen))); + t1 = to_bit_interleaving(U64BIG(*(u64*)(c + *mlen + 8))); + if (((s.x3.e ^ t0.e) | (s.x3.o ^ t0.o) | (s.x4.e ^ t1.e) | (s.x4.o ^ t1.o)) != + 0) { + *mlen = 0; + return -1; + } + + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/encrypt.c b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/encrypt.c new file mode 100644 index 0000000..b5dc587 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/encrypt.c @@ -0,0 +1,24 @@ +#include "core.h" + +int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen, + const unsigned char* m, unsigned long long mlen, + const unsigned char* ad, unsigned long long adlen, + const unsigned char* nsec, const unsigned char* npub, + const unsigned char* k) { + state s; + u64 tmp0, tmp1; + (void)nsec; + + // set ciphertext size + *clen = mlen + CRYPTO_ABYTES; + + ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENC); + + // set tag + tmp0 = from_bit_interleaving(s.x3); + *(u64*)(c + mlen) = U64BIG(tmp0); + tmp1 = from_bit_interleaving(s.x4); + *(u64*)(c + mlen + 8) = U64BIG(tmp1); + + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/endian.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/endian.h new file mode 100644 index 0000000..b4d18f5 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/endian.h @@ -0,0 +1,29 @@ +#ifndef ENDIAN_H_ +#define ENDIAN_H_ + +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + +// macros for big endian machines +#define U64BIG(x) (x) +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +#elif defined(_MSC_VER) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + +// macros for little endian machines +#define U64BIG(x) \ + ((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) | \ + (((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) | \ + (((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) | \ + (((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56)) +#define U32BIG(x) \ + ((((x)&0x000000FF) << 24) | (((x)&0x0000FF00) << 8) | \ + (((x)&0x00FF0000) >> 8) | (((x)&0xFF000000) >> 24)) +#define U16BIG(x) ((((x)&0x00FF) << 8) | (((x)&0xFF00) >> 8)) + +#else +#error "ascon byte order macros not defined in endian.h" +#endif + +#endif // ENDIAN_H_ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/implementors b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/implementors new file mode 100644 index 0000000..b110c1a --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/implementors @@ -0,0 +1,2 @@ +Christoph Dobraunig +Martin Schläffer diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/permutations.c b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/permutations.c new file mode 100644 index 0000000..bc47f5f --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/permutations.c @@ -0,0 +1,50 @@ +#include "permutations.h" + +static const u8 constants[][2] = { + {0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, {0x6, 0xc}, {0x3, 0xc}, + {0x6, 0x9}, {0x3, 0x9}, {0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}}; + +// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 +u32_2 to_bit_interleaving(u64 in) { + u32 hi = (in) >> 32; + u32 lo = (u32)(in); + u32 r0, r1; + u32_2 out; + r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); + r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); + r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); + r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); + r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); + r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); + r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); + r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); + out.e = (lo & 0x0000FFFF) | (hi << 16); + out.o = (lo >> 16) | (hi & 0xFFFF0000); + return out; +} + +// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 +u64 from_bit_interleaving(u32_2 in) { + u32 lo = (in.e & 0x0000FFFF) | (in.o << 16); + u32 hi = (in.e >> 16) | (in.o & 0xFFFF0000); + u32 r0, r1; + u64 out; + r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); + r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); + r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); + r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); + r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); + r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); + r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); + r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); + out = (u64)hi << 32 | lo; + return out; +} + +void P(state *p, u8 rounds) { + state s = *p; + u32_2 t0, t1, t2, t3, t4; + u32 i, start = START_ROUND(rounds); + for (i = start; i < 12; i++) ROUND(constants[i][0], constants[i][1]); + *p = s; +} diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/permutations.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/permutations.h new file mode 100644 index 0000000..bc643ce --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/permutations.h @@ -0,0 +1,71 @@ +#ifndef PERMUTATIONS_H_ +#define PERMUTATIONS_H_ + +typedef unsigned char u8; +typedef unsigned int u32; +typedef unsigned long long u64; + +typedef struct { + u32 e; + u32 o; +} u32_2; + +typedef struct { + u32_2 x0; + u32_2 x1; + u32_2 x2; + u32_2 x3; + u32_2 x4; +} state; + +#define EXT_BYTE64(x, n) ((u8)((u64)(x) >> (8 * (7 - (n))))) +#define INS_BYTE64(x, n) ((u64)(x) << (8 * (7 - (n)))) +#define ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n)))) +#define START_ROUND(x) (12 - (x)) + +// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 +u32_2 to_bit_interleaving(u64 in); + +// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 +u64 from_bit_interleaving(u32_2 in); + +/* clang-format off */ +#define ROUND(C_e, C_o) \ + do { \ + /* round constant */ \ + s.x2.e ^= C_e; s.x2.o ^= C_o; \ + /* s-box layer */ \ + s.x0.e ^= s.x4.e; s.x0.o ^= s.x4.o; \ + s.x4.e ^= s.x3.e; s.x4.o ^= s.x3.o; \ + s.x2.e ^= s.x1.e; s.x2.o ^= s.x1.o; \ + t0.e = s.x0.e; t0.o = s.x0.o; \ + t4.e = s.x4.e; t4.o = s.x4.o; \ + t3.e = s.x3.e; t3.o = s.x3.o; \ + t1.e = s.x1.e; t1.o = s.x1.o; \ + t2.e = s.x2.e; t2.o = s.x2.o; \ + s.x0.e = t0.e ^ (~t1.e & t2.e); s.x0.o = t0.o ^ (~t1.o & t2.o); \ + s.x2.e = t2.e ^ (~t3.e & t4.e); s.x2.o = t2.o ^ (~t3.o & t4.o); \ + s.x4.e = t4.e ^ (~t0.e & t1.e); s.x4.o = t4.o ^ (~t0.o & t1.o); \ + s.x1.e = t1.e ^ (~t2.e & t3.e); s.x1.o = t1.o ^ (~t2.o & t3.o); \ + s.x3.e = t3.e ^ (~t4.e & t0.e); s.x3.o = t3.o ^ (~t4.o & t0.o); \ + s.x1.e ^= s.x0.e; s.x1.o ^= s.x0.o; \ + s.x3.e ^= s.x2.e; s.x3.o ^= s.x2.o; \ + s.x0.e ^= s.x4.e; s.x0.o ^= s.x4.o; \ + /* linear layer */ \ + t0.e = s.x0.e ^ ROTR32(s.x0.o, 4); t0.o = s.x0.o ^ ROTR32(s.x0.e, 5); \ + t1.e = s.x1.e ^ ROTR32(s.x1.e, 11); t1.o = s.x1.o ^ ROTR32(s.x1.o, 11); \ + t2.e = s.x2.e ^ ROTR32(s.x2.o, 2); t2.o = s.x2.o ^ ROTR32(s.x2.e, 3); \ + t3.e = s.x3.e ^ ROTR32(s.x3.o, 3); t3.o = s.x3.o ^ ROTR32(s.x3.e, 4); \ + t4.e = s.x4.e ^ ROTR32(s.x4.e, 17); t4.o = s.x4.o ^ ROTR32(s.x4.o, 17); \ + s.x0.e ^= ROTR32(t0.o, 9); s.x0.o ^= ROTR32(t0.e, 10); \ + s.x1.e ^= ROTR32(t1.o, 19); s.x1.o ^= ROTR32(t1.e, 20); \ + s.x2.e ^= t2.o; s.x2.o ^= ROTR32(t2.e, 1); \ + s.x3.e ^= ROTR32(t3.e, 5); s.x3.o ^= ROTR32(t3.o, 5); \ + s.x4.e ^= ROTR32(t4.o, 3); s.x4.o ^= ROTR32(t4.e, 4); \ + s.x2.e = ~s.x2.e; s.x2.o = ~s.x2.o; \ + } while(0) +/* clang-format on */ + +void P(state *p, u8 rounds); + +#endif // PERMUTATIONS_H_ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/api.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/api.h new file mode 100644 index 0000000..a4aa567 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/core.c b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/core.c new file mode 100644 index 0000000..48cac3d --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/core.c @@ -0,0 +1,64 @@ +#include "core.h" + +void process_data(state* s, unsigned char* out, const unsigned char* in, + unsigned long long len, u8 mode) { + u64 i; + + while (len >= RATE) { + s->x0 ^= U64BIG(*(u64*)in); + if (mode != ASCON_AD) *(u64*)out = U64BIG(s->x0); + if (mode == ASCON_DEC) s->x0 = U64BIG(*((u64*)in)); + P(s, PB_ROUNDS); + in += RATE; + out += RATE; + len -= RATE; + } + + for (i = 0; i < len; ++i, ++out, ++in) { + s->x0 ^= INS_BYTE64(*in, i); + + if (mode != ASCON_AD) *out = EXT_BYTE64(s->x0, i); + if (mode == ASCON_DEC) { + s->x0 &= ~INS_BYTE64(0xff, i); + s->x0 |= INS_BYTE64(*in, i); + } + } + s->x0 ^= INS_BYTE64(0x80, len); +} + +void ascon_core(state* s, unsigned char* out, const unsigned char* in, + unsigned long long tlen, const unsigned char* ad, + unsigned long long adlen, const unsigned char* npub, + const unsigned char* k, u8 mode) { + const u64 K0 = U64BIG(*(u64*)k); + const u64 K1 = U64BIG(*(u64*)(k + 8)); + const u64 N0 = U64BIG(*(u64*)npub); + const u64 N1 = U64BIG(*(u64*)(npub + 8)); + + // initialization + s->x0 = IV; + s->x1 = K0; + s->x2 = K1; + s->x3 = N0; + s->x4 = N1; + P(s, PA_ROUNDS); + s->x3 ^= K0; + s->x4 ^= K1; + + // process associated data + if (adlen) { + process_data(s, (void*)0, ad, adlen, ASCON_AD); + P(s, PB_ROUNDS); + } + s->x4 ^= 1; + + // process plaintext/ciphertext + process_data(s, out, in, tlen, mode); + + // finalization + s->x1 ^= K0; + s->x2 ^= K1; + P(s, PA_ROUNDS); + s->x3 ^= K0; + s->x4 ^= K1; +} diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/core.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/core.h new file mode 100644 index 0000000..90076c1 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/core.h @@ -0,0 +1,27 @@ +#ifndef CORE_H_ +#define CORE_H_ + +#include "api.h" +#include "endian.h" +#include "permutations.h" + +#define ASCON_AD 0 +#define ASCON_ENC 1 +#define ASCON_DEC 2 + +#define RATE (64 / 8) +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define IV \ + ((u64)(8 * (CRYPTO_KEYBYTES)) << 56 | (u64)(8 * (RATE)) << 48 | \ + (u64)(PA_ROUNDS) << 40 | (u64)(PB_ROUNDS) << 32) + +void process_data(state* s, unsigned char* out, const unsigned char* in, + unsigned long long len, u8 mode); + +void ascon_core(state* s, unsigned char* out, const unsigned char* in, + unsigned long long tlen, const unsigned char* ad, + unsigned long long adlen, const unsigned char* npub, + const unsigned char* k, u8 mode); + +#endif // CORE_H_ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/decrypt.c b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/decrypt.c new file mode 100644 index 0000000..0cde81e --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/decrypt.c @@ -0,0 +1,29 @@ +#include "core.h" + +int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen, + unsigned char* nsec, const unsigned char* c, + unsigned long long clen, const unsigned char* ad, + unsigned long long adlen, const unsigned char* npub, + const unsigned char* k) { + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + + state s; + (void)nsec; + + // set plaintext size + *mlen = clen - CRYPTO_ABYTES; + + ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DEC); + + // verify tag (should be constant time, check compiler output) + if (((s.x3 ^ U64BIG(*(u64*)(c + *mlen))) | + (s.x4 ^ U64BIG(*(u64*)(c + *mlen + 8)))) != 0) { + *mlen = 0; + return -1; + } + + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/encrypt.c b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/encrypt.c new file mode 100644 index 0000000..5961c60 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/encrypt.c @@ -0,0 +1,21 @@ +#include "core.h" + +int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen, + const unsigned char* m, unsigned long long mlen, + const unsigned char* ad, unsigned long long adlen, + const unsigned char* nsec, const unsigned char* npub, + const unsigned char* k) { + state s; + (void)nsec; + + // set ciphertext size + *clen = mlen + CRYPTO_ABYTES; + + ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENC); + + // set tag + *(u64*)(c + mlen) = U64BIG(s.x3); + *(u64*)(c + mlen + 8) = U64BIG(s.x4); + + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/endian.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/endian.h new file mode 100644 index 0000000..b4d18f5 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/endian.h @@ -0,0 +1,29 @@ +#ifndef ENDIAN_H_ +#define ENDIAN_H_ + +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + +// macros for big endian machines +#define U64BIG(x) (x) +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +#elif defined(_MSC_VER) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + +// macros for little endian machines +#define U64BIG(x) \ + ((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) | \ + (((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) | \ + (((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) | \ + (((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56)) +#define U32BIG(x) \ + ((((x)&0x000000FF) << 24) | (((x)&0x0000FF00) << 8) | \ + (((x)&0x00FF0000) >> 8) | (((x)&0xFF000000) >> 24)) +#define U16BIG(x) ((((x)&0x00FF) << 8) | (((x)&0xFF00) >> 8)) + +#else +#error "ascon byte order macros not defined in endian.h" +#endif + +#endif // ENDIAN_H_ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/implementors b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/implementors new file mode 100644 index 0000000..b110c1a --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/implementors @@ -0,0 +1,2 @@ +Christoph Dobraunig +Martin Schläffer diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/permutations.c b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/permutations.c new file mode 100644 index 0000000..9aaf9d1 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/permutations.c @@ -0,0 +1,8 @@ +#include "permutations.h" + +void P(state *p, u8 rounds) { + state s = *p; + u8 i, start = START_CONSTANT(rounds); + for (i = start; i > 0x4a; i -= 0x0f) ROUND(i); + *p = s; +} diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/permutations.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/permutations.h new file mode 100644 index 0000000..7143e82 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/permutations.h @@ -0,0 +1,66 @@ +#ifndef PERMUTATIONS_H_ +#define PERMUTATIONS_H_ + +typedef unsigned char u8; +typedef unsigned long long u64; + +typedef struct { + u64 x0, x1, x2, x3, x4; +} state; + +#define EXT_BYTE64(x, n) ((u8)((u64)(x) >> (8 * (7 - (n))))) +#define INS_BYTE64(x, n) ((u64)(x) << (8 * (7 - (n)))) +#define ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) +#define START_CONSTANT(x) (((0xf - (12 - (x))) << 4) | (12 - (x))) + +#define ROUND(C) \ + do { \ + state t; \ + s.x2 ^= C; \ + s.x0 ^= s.x4; \ + s.x4 ^= s.x3; \ + s.x2 ^= s.x1; \ + t.x0 = s.x0; \ + t.x4 = s.x4; \ + t.x3 = s.x3; \ + t.x1 = s.x1; \ + t.x2 = s.x2; \ + s.x0 = t.x0 ^ ((~t.x1) & t.x2); \ + s.x2 = t.x2 ^ ((~t.x3) & t.x4); \ + s.x4 = t.x4 ^ ((~t.x0) & t.x1); \ + s.x1 = t.x1 ^ ((~t.x2) & t.x3); \ + s.x3 = t.x3 ^ ((~t.x4) & t.x0); \ + s.x1 ^= s.x0; \ + t.x1 = s.x1; \ + s.x1 = ROTR64(s.x1, 39); \ + s.x3 ^= s.x2; \ + t.x2 = s.x2; \ + s.x2 = ROTR64(s.x2, 1); \ + t.x4 = s.x4; \ + t.x2 ^= s.x2; \ + s.x2 = ROTR64(s.x2, 6 - 1); \ + t.x3 = s.x3; \ + t.x1 ^= s.x1; \ + s.x3 = ROTR64(s.x3, 10); \ + s.x0 ^= s.x4; \ + s.x4 = ROTR64(s.x4, 7); \ + t.x3 ^= s.x3; \ + s.x2 ^= t.x2; \ + s.x1 = ROTR64(s.x1, 61 - 39); \ + t.x0 = s.x0; \ + s.x2 = ~s.x2; \ + s.x3 = ROTR64(s.x3, 17 - 10); \ + t.x4 ^= s.x4; \ + s.x4 = ROTR64(s.x4, 41 - 7); \ + s.x3 ^= t.x3; \ + s.x1 ^= t.x1; \ + s.x0 = ROTR64(s.x0, 19); \ + s.x4 ^= t.x4; \ + t.x0 ^= s.x0; \ + s.x0 = ROTR64(s.x0, 28 - 19); \ + s.x0 ^= t.x0; \ + } while (0) + +void P(state *p, u8 rounds); + +#endif // PERMUTATIONS_H_ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/api.h b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/api.h new file mode 100644 index 0000000..4b53d6c --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 20 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/core.c b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/core.c new file mode 100644 index 0000000..88bcb45 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/core.c @@ -0,0 +1,67 @@ +#include "core.h" + +void process_data(state* s, unsigned char* out, const unsigned char* in, + unsigned long long len, u8 mode) { + u64 i; + + while (len >= RATE) { + s->x0 ^= U64BIG(*(u64*)in); + if (mode != ASCON_AD) *(u64*)out = U64BIG(s->x0); + if (mode == ASCON_DEC) s->x0 = U64BIG(*((u64*)in)); + P(s, PB_ROUNDS); + in += RATE; + out += RATE; + len -= RATE; + } + + for (i = 0; i < len; ++i, ++out, ++in) { + s->x0 ^= INS_BYTE64(*in, i); + + if (mode != ASCON_AD) *out = EXT_BYTE64(s->x0, i); + if (mode == ASCON_DEC) { + s->x0 &= ~INS_BYTE64(0xff, i); + s->x0 |= INS_BYTE64(*in, i); + } + } + s->x0 ^= INS_BYTE64(0x80, len); +} + +void ascon_core(state* s, unsigned char* out, const unsigned char* in, + unsigned long long tlen, const unsigned char* ad, + unsigned long long adlen, const unsigned char* npub, + const unsigned char* k, u8 mode) { + const u64 K0 = U64BIG(*(u64*)(k + 0)) >> 32; + const u64 K1 = U64BIG(*(u64*)(k + 4)); + const u64 K2 = U64BIG(*(u64*)(k + 12)); + const u64 N0 = U64BIG(*(u64*)npub); + const u64 N1 = U64BIG(*(u64*)(npub + 8)); + + // initialization + s->x0 = IV | K0; + s->x1 = K1; + s->x2 = K2; + s->x3 = N0; + s->x4 = N1; + P(s, PA_ROUNDS); + s->x2 ^= K0; + s->x3 ^= K1; + s->x4 ^= K2; + + // process associated data + if (adlen) { + process_data(s, (void*)0, ad, adlen, ASCON_AD); + P(s, PB_ROUNDS); + } + s->x4 ^= 1; + + // process plaintext/ciphertext + process_data(s, out, in, tlen, mode); + + // finalization + s->x1 ^= K0 << 32 | K1 >> 32; + s->x2 ^= K1 << 32 | K2 >> 32; + s->x3 ^= K2 << 32; + P(s, PA_ROUNDS); + s->x3 ^= K1; + s->x4 ^= K2; +} diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/core.h b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/core.h new file mode 100644 index 0000000..90076c1 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/core.h @@ -0,0 +1,27 @@ +#ifndef CORE_H_ +#define CORE_H_ + +#include "api.h" +#include "endian.h" +#include "permutations.h" + +#define ASCON_AD 0 +#define ASCON_ENC 1 +#define ASCON_DEC 2 + +#define RATE (64 / 8) +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define IV \ + ((u64)(8 * (CRYPTO_KEYBYTES)) << 56 | (u64)(8 * (RATE)) << 48 | \ + (u64)(PA_ROUNDS) << 40 | (u64)(PB_ROUNDS) << 32) + +void process_data(state* s, unsigned char* out, const unsigned char* in, + unsigned long long len, u8 mode); + +void ascon_core(state* s, unsigned char* out, const unsigned char* in, + unsigned long long tlen, const unsigned char* ad, + unsigned long long adlen, const unsigned char* npub, + const unsigned char* k, u8 mode); + +#endif // CORE_H_ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/decrypt.c b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/decrypt.c new file mode 100644 index 0000000..0cde81e --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/decrypt.c @@ -0,0 +1,29 @@ +#include "core.h" + +int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen, + unsigned char* nsec, const unsigned char* c, + unsigned long long clen, const unsigned char* ad, + unsigned long long adlen, const unsigned char* npub, + const unsigned char* k) { + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + + state s; + (void)nsec; + + // set plaintext size + *mlen = clen - CRYPTO_ABYTES; + + ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DEC); + + // verify tag (should be constant time, check compiler output) + if (((s.x3 ^ U64BIG(*(u64*)(c + *mlen))) | + (s.x4 ^ U64BIG(*(u64*)(c + *mlen + 8)))) != 0) { + *mlen = 0; + return -1; + } + + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/encrypt.c b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/encrypt.c new file mode 100644 index 0000000..5961c60 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/encrypt.c @@ -0,0 +1,21 @@ +#include "core.h" + +int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen, + const unsigned char* m, unsigned long long mlen, + const unsigned char* ad, unsigned long long adlen, + const unsigned char* nsec, const unsigned char* npub, + const unsigned char* k) { + state s; + (void)nsec; + + // set ciphertext size + *clen = mlen + CRYPTO_ABYTES; + + ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENC); + + // set tag + *(u64*)(c + mlen) = U64BIG(s.x3); + *(u64*)(c + mlen + 8) = U64BIG(s.x4); + + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/endian.h b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/endian.h new file mode 100644 index 0000000..b4d18f5 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/endian.h @@ -0,0 +1,29 @@ +#ifndef ENDIAN_H_ +#define ENDIAN_H_ + +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + +// macros for big endian machines +#define U64BIG(x) (x) +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +#elif defined(_MSC_VER) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + +// macros for little endian machines +#define U64BIG(x) \ + ((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) | \ + (((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) | \ + (((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) | \ + (((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56)) +#define U32BIG(x) \ + ((((x)&0x000000FF) << 24) | (((x)&0x0000FF00) << 8) | \ + (((x)&0x00FF0000) >> 8) | (((x)&0xFF000000) >> 24)) +#define U16BIG(x) ((((x)&0x00FF) << 8) | (((x)&0xFF00) >> 8)) + +#else +#error "ascon byte order macros not defined in endian.h" +#endif + +#endif // ENDIAN_H_ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/implementors b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/implementors new file mode 100644 index 0000000..b110c1a --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/implementors @@ -0,0 +1,2 @@ +Christoph Dobraunig +Martin Schläffer diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/permutations.c b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/permutations.c new file mode 100644 index 0000000..9aaf9d1 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/permutations.c @@ -0,0 +1,8 @@ +#include "permutations.h" + +void P(state *p, u8 rounds) { + state s = *p; + u8 i, start = START_CONSTANT(rounds); + for (i = start; i > 0x4a; i -= 0x0f) ROUND(i); + *p = s; +} diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/permutations.h b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/permutations.h new file mode 100644 index 0000000..7143e82 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/permutations.h @@ -0,0 +1,66 @@ +#ifndef PERMUTATIONS_H_ +#define PERMUTATIONS_H_ + +typedef unsigned char u8; +typedef unsigned long long u64; + +typedef struct { + u64 x0, x1, x2, x3, x4; +} state; + +#define EXT_BYTE64(x, n) ((u8)((u64)(x) >> (8 * (7 - (n))))) +#define INS_BYTE64(x, n) ((u64)(x) << (8 * (7 - (n)))) +#define ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) +#define START_CONSTANT(x) (((0xf - (12 - (x))) << 4) | (12 - (x))) + +#define ROUND(C) \ + do { \ + state t; \ + s.x2 ^= C; \ + s.x0 ^= s.x4; \ + s.x4 ^= s.x3; \ + s.x2 ^= s.x1; \ + t.x0 = s.x0; \ + t.x4 = s.x4; \ + t.x3 = s.x3; \ + t.x1 = s.x1; \ + t.x2 = s.x2; \ + s.x0 = t.x0 ^ ((~t.x1) & t.x2); \ + s.x2 = t.x2 ^ ((~t.x3) & t.x4); \ + s.x4 = t.x4 ^ ((~t.x0) & t.x1); \ + s.x1 = t.x1 ^ ((~t.x2) & t.x3); \ + s.x3 = t.x3 ^ ((~t.x4) & t.x0); \ + s.x1 ^= s.x0; \ + t.x1 = s.x1; \ + s.x1 = ROTR64(s.x1, 39); \ + s.x3 ^= s.x2; \ + t.x2 = s.x2; \ + s.x2 = ROTR64(s.x2, 1); \ + t.x4 = s.x4; \ + t.x2 ^= s.x2; \ + s.x2 = ROTR64(s.x2, 6 - 1); \ + t.x3 = s.x3; \ + t.x1 ^= s.x1; \ + s.x3 = ROTR64(s.x3, 10); \ + s.x0 ^= s.x4; \ + s.x4 = ROTR64(s.x4, 7); \ + t.x3 ^= s.x3; \ + s.x2 ^= t.x2; \ + s.x1 = ROTR64(s.x1, 61 - 39); \ + t.x0 = s.x0; \ + s.x2 = ~s.x2; \ + s.x3 = ROTR64(s.x3, 17 - 10); \ + t.x4 ^= s.x4; \ + s.x4 = ROTR64(s.x4, 41 - 7); \ + s.x3 ^= t.x3; \ + s.x1 ^= t.x1; \ + s.x0 = ROTR64(s.x0, 19); \ + s.x4 ^= t.x4; \ + t.x0 ^= s.x0; \ + s.x0 = ROTR64(s.x0, 28 - 19); \ + s.x0 ^= t.x0; \ + } while (0) + +void P(state *p, u8 rounds); + +#endif // PERMUTATIONS_H_ diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/rhys/forkae.c b/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/rhys/forkae.c index 4a9671a..49e7610 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/rhys/forkae.c +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/rhys/forkae.c @@ -22,7 +22,6 @@ #include "forkae.h" #include "internal-forkskinny.h" -#include "internal-util.h" #include aead_cipher_t const forkae_paef_64_192_cipher = { @@ -138,3 +137,476 @@ aead_cipher_t const forkae_saef_128_256_cipher = { #define FORKAE_TWEAKEY_REDUCED_SIZE 32 #define FORKAE_BLOCK_FUNC forkskinny_128_256 #include "internal-forkae-saef.h" + +/* Helper functions to implement the forking encrypt/decrypt block operations + * on top of the basic "perform N rounds" functions in internal-forkskinny.c */ + +/** + * \brief Number of rounds of ForkSkinny-128-256 before forking. + */ +#define FORKSKINNY_128_256_ROUNDS_BEFORE 21 + +/** + * \brief Number of rounds of ForkSkinny-128-256 after forking. + */ +#define FORKSKINNY_128_256_ROUNDS_AFTER 27 + +void forkskinny_128_256_encrypt + (const unsigned char key[32], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_256_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Run all of the rounds before the forking point */ + forkskinny_128_256_rounds(&state, 0, FORKSKINNY_128_256_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint32_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x08040201U; /* Branching constant */ + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + } +} + +void forkskinny_128_256_decrypt + (const unsigned char key[32], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_256_state_t state; + forkskinny_128_256_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_128_256_forward_tk + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_128_256_inv_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x08040201U; + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_128_256_reverse_tk(&state, FORKSKINNY_128_256_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_128_256_inv_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, 0); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_128_256_rounds + (&fstate, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, fstate.S[0]); + le_store_word32(output_right + 4, fstate.S[1]); + le_store_word32(output_right + 8, fstate.S[2]); + le_store_word32(output_right + 12, fstate.S[3]); +} + +/** + * \brief Number of rounds of ForkSkinny-128-384 before forking. + */ +#define FORKSKINNY_128_384_ROUNDS_BEFORE 25 + +/** + * \brief Number of rounds of ForkSkinny-128-384 after forking. + */ +#define FORKSKINNY_128_384_ROUNDS_AFTER 31 + +void forkskinny_128_384_encrypt + (const unsigned char key[48], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_384_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.TK3[0] = le_load_word32(key + 32); + state.TK3[1] = le_load_word32(key + 36); + state.TK3[2] = le_load_word32(key + 40); + state.TK3[3] = le_load_word32(key + 44); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Run all of the rounds before the forking point */ + forkskinny_128_384_rounds(&state, 0, FORKSKINNY_128_384_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint32_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x08040201U; /* Branching constant */ + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + } +} + +void forkskinny_128_384_decrypt + (const unsigned char key[48], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_384_state_t state; + forkskinny_128_384_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.TK3[0] = le_load_word32(key + 32); + state.TK3[1] = le_load_word32(key + 36); + state.TK3[2] = le_load_word32(key + 40); + state.TK3[3] = le_load_word32(key + 44); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_128_384_forward_tk + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_128_384_inv_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x08040201U; + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_128_384_reverse_tk(&state, FORKSKINNY_128_384_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_128_384_inv_rounds(&state, FORKSKINNY_128_384_ROUNDS_BEFORE, 0); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_128_384_rounds + (&fstate, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, fstate.S[0]); + le_store_word32(output_right + 4, fstate.S[1]); + le_store_word32(output_right + 8, fstate.S[2]); + le_store_word32(output_right + 12, fstate.S[3]); +} + +/** + * \brief Number of rounds of ForkSkinny-64-192 before forking. + */ +#define FORKSKINNY_64_192_ROUNDS_BEFORE 17 + +/** + * \brief Number of rounds of ForkSkinny-64-192 after forking. + */ +#define FORKSKINNY_64_192_ROUNDS_AFTER 23 + +void forkskinny_64_192_encrypt + (const unsigned char key[24], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_64_192_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = be_load_word16(key); + state.TK1[1] = be_load_word16(key + 2); + state.TK1[2] = be_load_word16(key + 4); + state.TK1[3] = be_load_word16(key + 6); + state.TK2[0] = be_load_word16(key + 8); + state.TK2[1] = be_load_word16(key + 10); + state.TK2[2] = be_load_word16(key + 12); + state.TK2[3] = be_load_word16(key + 14); + state.TK3[0] = be_load_word16(key + 16); + state.TK3[1] = be_load_word16(key + 18); + state.TK3[2] = be_load_word16(key + 20); + state.TK3[3] = be_load_word16(key + 22); + state.S[0] = be_load_word16(input); + state.S[1] = be_load_word16(input + 2); + state.S[2] = be_load_word16(input + 4); + state.S[3] = be_load_word16(input + 6); + + /* Run all of the rounds before the forking point */ + forkskinny_64_192_rounds(&state, 0, FORKSKINNY_64_192_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint16_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, state.S[0]); + be_store_word16(output_right + 2, state.S[1]); + be_store_word16(output_right + 4, state.S[2]); + be_store_word16(output_right + 6, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x1249U; /* Branching constant */ + state.S[1] ^= 0x36daU; + state.S[2] ^= 0x5b7fU; + state.S[3] ^= 0xec81U; + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2); + be_store_word16(output_left, state.S[0]); + be_store_word16(output_left + 2, state.S[1]); + be_store_word16(output_left + 4, state.S[2]); + be_store_word16(output_left + 6, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, state.S[0]); + be_store_word16(output_right + 2, state.S[1]); + be_store_word16(output_right + 4, state.S[2]); + be_store_word16(output_right + 6, state.S[3]); + } +} + +void forkskinny_64_192_decrypt + (const unsigned char key[24], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_64_192_state_t state; + forkskinny_64_192_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = be_load_word16(key); + state.TK1[1] = be_load_word16(key + 2); + state.TK1[2] = be_load_word16(key + 4); + state.TK1[3] = be_load_word16(key + 6); + state.TK2[0] = be_load_word16(key + 8); + state.TK2[1] = be_load_word16(key + 10); + state.TK2[2] = be_load_word16(key + 12); + state.TK2[3] = be_load_word16(key + 14); + state.TK3[0] = be_load_word16(key + 16); + state.TK3[1] = be_load_word16(key + 18); + state.TK3[2] = be_load_word16(key + 20); + state.TK3[3] = be_load_word16(key + 22); + state.S[0] = be_load_word16(input); + state.S[1] = be_load_word16(input + 2); + state.S[2] = be_load_word16(input + 4); + state.S[3] = be_load_word16(input + 6); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_64_192_forward_tk + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_64_192_inv_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x1249U; + state.S[1] ^= 0x36daU; + state.S[2] ^= 0x5b7fU; + state.S[3] ^= 0xec81U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_64_192_reverse_tk(&state, FORKSKINNY_64_192_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_64_192_inv_rounds(&state, FORKSKINNY_64_192_ROUNDS_BEFORE, 0); + be_store_word16(output_left, state.S[0]); + be_store_word16(output_left + 2, state.S[1]); + be_store_word16(output_left + 4, state.S[2]); + be_store_word16(output_left + 6, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_64_192_rounds + (&fstate, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, fstate.S[0]); + be_store_word16(output_right + 2, fstate.S[1]); + be_store_word16(output_right + 4, fstate.S[2]); + be_store_word16(output_right + 6, fstate.S[3]); +} diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/rhys/internal-forkskinny-avr.S b/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/rhys/internal-forkskinny-avr.S new file mode 100644 index 0000000..c7e0b37 --- /dev/null +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/rhys/internal-forkskinny-avr.S @@ -0,0 +1,8880 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_0, @object + .size table_0, 256 +table_0: + .byte 101 + .byte 76 + .byte 106 + .byte 66 + .byte 75 + .byte 99 + .byte 67 + .byte 107 + .byte 85 + .byte 117 + .byte 90 + .byte 122 + .byte 83 + .byte 115 + .byte 91 + .byte 123 + .byte 53 + .byte 140 + .byte 58 + .byte 129 + .byte 137 + .byte 51 + .byte 128 + .byte 59 + .byte 149 + .byte 37 + .byte 152 + .byte 42 + .byte 144 + .byte 35 + .byte 153 + .byte 43 + .byte 229 + .byte 204 + .byte 232 + .byte 193 + .byte 201 + .byte 224 + .byte 192 + .byte 233 + .byte 213 + .byte 245 + .byte 216 + .byte 248 + .byte 208 + .byte 240 + .byte 217 + .byte 249 + .byte 165 + .byte 28 + .byte 168 + .byte 18 + .byte 27 + .byte 160 + .byte 19 + .byte 169 + .byte 5 + .byte 181 + .byte 10 + .byte 184 + .byte 3 + .byte 176 + .byte 11 + .byte 185 + .byte 50 + .byte 136 + .byte 60 + .byte 133 + .byte 141 + .byte 52 + .byte 132 + .byte 61 + .byte 145 + .byte 34 + .byte 156 + .byte 44 + .byte 148 + .byte 36 + .byte 157 + .byte 45 + .byte 98 + .byte 74 + .byte 108 + .byte 69 + .byte 77 + .byte 100 + .byte 68 + .byte 109 + .byte 82 + .byte 114 + .byte 92 + .byte 124 + .byte 84 + .byte 116 + .byte 93 + .byte 125 + .byte 161 + .byte 26 + .byte 172 + .byte 21 + .byte 29 + .byte 164 + .byte 20 + .byte 173 + .byte 2 + .byte 177 + .byte 12 + .byte 188 + .byte 4 + .byte 180 + .byte 13 + .byte 189 + .byte 225 + .byte 200 + .byte 236 + .byte 197 + .byte 205 + .byte 228 + .byte 196 + .byte 237 + .byte 209 + .byte 241 + .byte 220 + .byte 252 + .byte 212 + .byte 244 + .byte 221 + .byte 253 + .byte 54 + .byte 142 + .byte 56 + .byte 130 + .byte 139 + .byte 48 + .byte 131 + .byte 57 + .byte 150 + .byte 38 + .byte 154 + .byte 40 + .byte 147 + .byte 32 + .byte 155 + .byte 41 + .byte 102 + .byte 78 + .byte 104 + .byte 65 + .byte 73 + .byte 96 + .byte 64 + .byte 105 + .byte 86 + .byte 118 + .byte 88 + .byte 120 + .byte 80 + .byte 112 + .byte 89 + .byte 121 + .byte 166 + .byte 30 + .byte 170 + .byte 17 + .byte 25 + .byte 163 + .byte 16 + .byte 171 + .byte 6 + .byte 182 + .byte 8 + .byte 186 + .byte 0 + .byte 179 + .byte 9 + .byte 187 + .byte 230 + .byte 206 + .byte 234 + .byte 194 + .byte 203 + .byte 227 + .byte 195 + .byte 235 + .byte 214 + .byte 246 + .byte 218 + .byte 250 + .byte 211 + .byte 243 + .byte 219 + .byte 251 + .byte 49 + .byte 138 + .byte 62 + .byte 134 + .byte 143 + .byte 55 + .byte 135 + .byte 63 + .byte 146 + .byte 33 + .byte 158 + .byte 46 + .byte 151 + .byte 39 + .byte 159 + .byte 47 + .byte 97 + .byte 72 + .byte 110 + .byte 70 + .byte 79 + .byte 103 + .byte 71 + .byte 111 + .byte 81 + .byte 113 + .byte 94 + .byte 126 + .byte 87 + .byte 119 + .byte 95 + .byte 127 + .byte 162 + .byte 24 + .byte 174 + .byte 22 + .byte 31 + .byte 167 + .byte 23 + .byte 175 + .byte 1 + .byte 178 + .byte 14 + .byte 190 + .byte 7 + .byte 183 + .byte 15 + .byte 191 + .byte 226 + .byte 202 + .byte 238 + .byte 198 + .byte 207 + .byte 231 + .byte 199 + .byte 239 + .byte 210 + .byte 242 + .byte 222 + .byte 254 + .byte 215 + .byte 247 + .byte 223 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_1, @object + .size table_1, 256 +table_1: + .byte 172 + .byte 232 + .byte 104 + .byte 60 + .byte 108 + .byte 56 + .byte 168 + .byte 236 + .byte 170 + .byte 174 + .byte 58 + .byte 62 + .byte 106 + .byte 110 + .byte 234 + .byte 238 + .byte 166 + .byte 163 + .byte 51 + .byte 54 + .byte 102 + .byte 99 + .byte 227 + .byte 230 + .byte 225 + .byte 164 + .byte 97 + .byte 52 + .byte 49 + .byte 100 + .byte 161 + .byte 228 + .byte 141 + .byte 201 + .byte 73 + .byte 29 + .byte 77 + .byte 25 + .byte 137 + .byte 205 + .byte 139 + .byte 143 + .byte 27 + .byte 31 + .byte 75 + .byte 79 + .byte 203 + .byte 207 + .byte 133 + .byte 192 + .byte 64 + .byte 21 + .byte 69 + .byte 16 + .byte 128 + .byte 197 + .byte 130 + .byte 135 + .byte 18 + .byte 23 + .byte 66 + .byte 71 + .byte 194 + .byte 199 + .byte 150 + .byte 147 + .byte 3 + .byte 6 + .byte 86 + .byte 83 + .byte 211 + .byte 214 + .byte 209 + .byte 148 + .byte 81 + .byte 4 + .byte 1 + .byte 84 + .byte 145 + .byte 212 + .byte 156 + .byte 216 + .byte 88 + .byte 12 + .byte 92 + .byte 8 + .byte 152 + .byte 220 + .byte 154 + .byte 158 + .byte 10 + .byte 14 + .byte 90 + .byte 94 + .byte 218 + .byte 222 + .byte 149 + .byte 208 + .byte 80 + .byte 5 + .byte 85 + .byte 0 + .byte 144 + .byte 213 + .byte 146 + .byte 151 + .byte 2 + .byte 7 + .byte 82 + .byte 87 + .byte 210 + .byte 215 + .byte 157 + .byte 217 + .byte 89 + .byte 13 + .byte 93 + .byte 9 + .byte 153 + .byte 221 + .byte 155 + .byte 159 + .byte 11 + .byte 15 + .byte 91 + .byte 95 + .byte 219 + .byte 223 + .byte 22 + .byte 19 + .byte 131 + .byte 134 + .byte 70 + .byte 67 + .byte 195 + .byte 198 + .byte 65 + .byte 20 + .byte 193 + .byte 132 + .byte 17 + .byte 68 + .byte 129 + .byte 196 + .byte 28 + .byte 72 + .byte 200 + .byte 140 + .byte 76 + .byte 24 + .byte 136 + .byte 204 + .byte 26 + .byte 30 + .byte 138 + .byte 142 + .byte 74 + .byte 78 + .byte 202 + .byte 206 + .byte 53 + .byte 96 + .byte 224 + .byte 165 + .byte 101 + .byte 48 + .byte 160 + .byte 229 + .byte 50 + .byte 55 + .byte 162 + .byte 167 + .byte 98 + .byte 103 + .byte 226 + .byte 231 + .byte 61 + .byte 105 + .byte 233 + .byte 173 + .byte 109 + .byte 57 + .byte 169 + .byte 237 + .byte 59 + .byte 63 + .byte 171 + .byte 175 + .byte 107 + .byte 111 + .byte 235 + .byte 239 + .byte 38 + .byte 35 + .byte 179 + .byte 182 + .byte 118 + .byte 115 + .byte 243 + .byte 246 + .byte 113 + .byte 36 + .byte 241 + .byte 180 + .byte 33 + .byte 116 + .byte 177 + .byte 244 + .byte 44 + .byte 120 + .byte 248 + .byte 188 + .byte 124 + .byte 40 + .byte 184 + .byte 252 + .byte 42 + .byte 46 + .byte 186 + .byte 190 + .byte 122 + .byte 126 + .byte 250 + .byte 254 + .byte 37 + .byte 112 + .byte 240 + .byte 181 + .byte 117 + .byte 32 + .byte 176 + .byte 245 + .byte 34 + .byte 39 + .byte 178 + .byte 183 + .byte 114 + .byte 119 + .byte 242 + .byte 247 + .byte 45 + .byte 121 + .byte 249 + .byte 189 + .byte 125 + .byte 41 + .byte 185 + .byte 253 + .byte 43 + .byte 47 + .byte 187 + .byte 191 + .byte 123 + .byte 127 + .byte 251 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_2, @object + .size table_2, 256 +table_2: + .byte 0 + .byte 2 + .byte 4 + .byte 6 + .byte 8 + .byte 10 + .byte 12 + .byte 14 + .byte 16 + .byte 18 + .byte 20 + .byte 22 + .byte 24 + .byte 26 + .byte 28 + .byte 30 + .byte 32 + .byte 34 + .byte 36 + .byte 38 + .byte 40 + .byte 42 + .byte 44 + .byte 46 + .byte 48 + .byte 50 + .byte 52 + .byte 54 + .byte 56 + .byte 58 + .byte 60 + .byte 62 + .byte 65 + .byte 67 + .byte 69 + .byte 71 + .byte 73 + .byte 75 + .byte 77 + .byte 79 + .byte 81 + .byte 83 + .byte 85 + .byte 87 + .byte 89 + .byte 91 + .byte 93 + .byte 95 + .byte 97 + .byte 99 + .byte 101 + .byte 103 + .byte 105 + .byte 107 + .byte 109 + .byte 111 + .byte 113 + .byte 115 + .byte 117 + .byte 119 + .byte 121 + .byte 123 + .byte 125 + .byte 127 + .byte 128 + .byte 130 + .byte 132 + .byte 134 + .byte 136 + .byte 138 + .byte 140 + .byte 142 + .byte 144 + .byte 146 + .byte 148 + .byte 150 + .byte 152 + .byte 154 + .byte 156 + .byte 158 + .byte 160 + .byte 162 + .byte 164 + .byte 166 + .byte 168 + .byte 170 + .byte 172 + .byte 174 + .byte 176 + .byte 178 + .byte 180 + .byte 182 + .byte 184 + .byte 186 + .byte 188 + .byte 190 + .byte 193 + .byte 195 + .byte 197 + .byte 199 + .byte 201 + .byte 203 + .byte 205 + .byte 207 + .byte 209 + .byte 211 + .byte 213 + .byte 215 + .byte 217 + .byte 219 + .byte 221 + .byte 223 + .byte 225 + .byte 227 + .byte 229 + .byte 231 + .byte 233 + .byte 235 + .byte 237 + .byte 239 + .byte 241 + .byte 243 + .byte 245 + .byte 247 + .byte 249 + .byte 251 + .byte 253 + .byte 255 + .byte 1 + .byte 3 + .byte 5 + .byte 7 + .byte 9 + .byte 11 + .byte 13 + .byte 15 + .byte 17 + .byte 19 + .byte 21 + .byte 23 + .byte 25 + .byte 27 + .byte 29 + .byte 31 + .byte 33 + .byte 35 + .byte 37 + .byte 39 + .byte 41 + .byte 43 + .byte 45 + .byte 47 + .byte 49 + .byte 51 + .byte 53 + .byte 55 + .byte 57 + .byte 59 + .byte 61 + .byte 63 + .byte 64 + .byte 66 + .byte 68 + .byte 70 + .byte 72 + .byte 74 + .byte 76 + .byte 78 + .byte 80 + .byte 82 + .byte 84 + .byte 86 + .byte 88 + .byte 90 + .byte 92 + .byte 94 + .byte 96 + .byte 98 + .byte 100 + .byte 102 + .byte 104 + .byte 106 + .byte 108 + .byte 110 + .byte 112 + .byte 114 + .byte 116 + .byte 118 + .byte 120 + .byte 122 + .byte 124 + .byte 126 + .byte 129 + .byte 131 + .byte 133 + .byte 135 + .byte 137 + .byte 139 + .byte 141 + .byte 143 + .byte 145 + .byte 147 + .byte 149 + .byte 151 + .byte 153 + .byte 155 + .byte 157 + .byte 159 + .byte 161 + .byte 163 + .byte 165 + .byte 167 + .byte 169 + .byte 171 + .byte 173 + .byte 175 + .byte 177 + .byte 179 + .byte 181 + .byte 183 + .byte 185 + .byte 187 + .byte 189 + .byte 191 + .byte 192 + .byte 194 + .byte 196 + .byte 198 + .byte 200 + .byte 202 + .byte 204 + .byte 206 + .byte 208 + .byte 210 + .byte 212 + .byte 214 + .byte 216 + .byte 218 + .byte 220 + .byte 222 + .byte 224 + .byte 226 + .byte 228 + .byte 230 + .byte 232 + .byte 234 + .byte 236 + .byte 238 + .byte 240 + .byte 242 + .byte 244 + .byte 246 + .byte 248 + .byte 250 + .byte 252 + .byte 254 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_3, @object + .size table_3, 256 +table_3: + .byte 0 + .byte 128 + .byte 1 + .byte 129 + .byte 2 + .byte 130 + .byte 3 + .byte 131 + .byte 4 + .byte 132 + .byte 5 + .byte 133 + .byte 6 + .byte 134 + .byte 7 + .byte 135 + .byte 8 + .byte 136 + .byte 9 + .byte 137 + .byte 10 + .byte 138 + .byte 11 + .byte 139 + .byte 12 + .byte 140 + .byte 13 + .byte 141 + .byte 14 + .byte 142 + .byte 15 + .byte 143 + .byte 16 + .byte 144 + .byte 17 + .byte 145 + .byte 18 + .byte 146 + .byte 19 + .byte 147 + .byte 20 + .byte 148 + .byte 21 + .byte 149 + .byte 22 + .byte 150 + .byte 23 + .byte 151 + .byte 24 + .byte 152 + .byte 25 + .byte 153 + .byte 26 + .byte 154 + .byte 27 + .byte 155 + .byte 28 + .byte 156 + .byte 29 + .byte 157 + .byte 30 + .byte 158 + .byte 31 + .byte 159 + .byte 160 + .byte 32 + .byte 161 + .byte 33 + .byte 162 + .byte 34 + .byte 163 + .byte 35 + .byte 164 + .byte 36 + .byte 165 + .byte 37 + .byte 166 + .byte 38 + .byte 167 + .byte 39 + .byte 168 + .byte 40 + .byte 169 + .byte 41 + .byte 170 + .byte 42 + .byte 171 + .byte 43 + .byte 172 + .byte 44 + .byte 173 + .byte 45 + .byte 174 + .byte 46 + .byte 175 + .byte 47 + .byte 176 + .byte 48 + .byte 177 + .byte 49 + .byte 178 + .byte 50 + .byte 179 + .byte 51 + .byte 180 + .byte 52 + .byte 181 + .byte 53 + .byte 182 + .byte 54 + .byte 183 + .byte 55 + .byte 184 + .byte 56 + .byte 185 + .byte 57 + .byte 186 + .byte 58 + .byte 187 + .byte 59 + .byte 188 + .byte 60 + .byte 189 + .byte 61 + .byte 190 + .byte 62 + .byte 191 + .byte 63 + .byte 64 + .byte 192 + .byte 65 + .byte 193 + .byte 66 + .byte 194 + .byte 67 + .byte 195 + .byte 68 + .byte 196 + .byte 69 + .byte 197 + .byte 70 + .byte 198 + .byte 71 + .byte 199 + .byte 72 + .byte 200 + .byte 73 + .byte 201 + .byte 74 + .byte 202 + .byte 75 + .byte 203 + .byte 76 + .byte 204 + .byte 77 + .byte 205 + .byte 78 + .byte 206 + .byte 79 + .byte 207 + .byte 80 + .byte 208 + .byte 81 + .byte 209 + .byte 82 + .byte 210 + .byte 83 + .byte 211 + .byte 84 + .byte 212 + .byte 85 + .byte 213 + .byte 86 + .byte 214 + .byte 87 + .byte 215 + .byte 88 + .byte 216 + .byte 89 + .byte 217 + .byte 90 + .byte 218 + .byte 91 + .byte 219 + .byte 92 + .byte 220 + .byte 93 + .byte 221 + .byte 94 + .byte 222 + .byte 95 + .byte 223 + .byte 224 + .byte 96 + .byte 225 + .byte 97 + .byte 226 + .byte 98 + .byte 227 + .byte 99 + .byte 228 + .byte 100 + .byte 229 + .byte 101 + .byte 230 + .byte 102 + .byte 231 + .byte 103 + .byte 232 + .byte 104 + .byte 233 + .byte 105 + .byte 234 + .byte 106 + .byte 235 + .byte 107 + .byte 236 + .byte 108 + .byte 237 + .byte 109 + .byte 238 + .byte 110 + .byte 239 + .byte 111 + .byte 240 + .byte 112 + .byte 241 + .byte 113 + .byte 242 + .byte 114 + .byte 243 + .byte 115 + .byte 244 + .byte 116 + .byte 245 + .byte 117 + .byte 246 + .byte 118 + .byte 247 + .byte 119 + .byte 248 + .byte 120 + .byte 249 + .byte 121 + .byte 250 + .byte 122 + .byte 251 + .byte 123 + .byte 252 + .byte 124 + .byte 253 + .byte 125 + .byte 254 + .byte 126 + .byte 255 + .byte 127 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_4, @object + .size table_4, 174 +table_4: + .byte 1 + .byte 0 + .byte 3 + .byte 0 + .byte 7 + .byte 0 + .byte 15 + .byte 0 + .byte 15 + .byte 1 + .byte 15 + .byte 3 + .byte 14 + .byte 7 + .byte 13 + .byte 7 + .byte 11 + .byte 7 + .byte 7 + .byte 7 + .byte 15 + .byte 6 + .byte 15 + .byte 5 + .byte 14 + .byte 3 + .byte 12 + .byte 7 + .byte 9 + .byte 7 + .byte 3 + .byte 7 + .byte 7 + .byte 6 + .byte 15 + .byte 4 + .byte 14 + .byte 1 + .byte 13 + .byte 3 + .byte 10 + .byte 7 + .byte 5 + .byte 7 + .byte 11 + .byte 6 + .byte 7 + .byte 5 + .byte 14 + .byte 2 + .byte 12 + .byte 5 + .byte 8 + .byte 3 + .byte 0 + .byte 7 + .byte 1 + .byte 6 + .byte 3 + .byte 4 + .byte 6 + .byte 0 + .byte 13 + .byte 0 + .byte 11 + .byte 1 + .byte 7 + .byte 3 + .byte 14 + .byte 6 + .byte 13 + .byte 5 + .byte 10 + .byte 3 + .byte 4 + .byte 7 + .byte 9 + .byte 6 + .byte 3 + .byte 5 + .byte 6 + .byte 2 + .byte 12 + .byte 4 + .byte 8 + .byte 1 + .byte 1 + .byte 3 + .byte 2 + .byte 6 + .byte 5 + .byte 4 + .byte 10 + .byte 0 + .byte 5 + .byte 1 + .byte 11 + .byte 2 + .byte 6 + .byte 5 + .byte 12 + .byte 2 + .byte 8 + .byte 5 + .byte 0 + .byte 3 + .byte 0 + .byte 6 + .byte 1 + .byte 4 + .byte 2 + .byte 0 + .byte 5 + .byte 0 + .byte 11 + .byte 0 + .byte 7 + .byte 1 + .byte 15 + .byte 2 + .byte 14 + .byte 5 + .byte 12 + .byte 3 + .byte 8 + .byte 7 + .byte 1 + .byte 7 + .byte 3 + .byte 6 + .byte 7 + .byte 4 + .byte 14 + .byte 0 + .byte 13 + .byte 1 + .byte 11 + .byte 3 + .byte 6 + .byte 7 + .byte 13 + .byte 6 + .byte 11 + .byte 5 + .byte 6 + .byte 3 + .byte 12 + .byte 6 + .byte 9 + .byte 5 + .byte 2 + .byte 3 + .byte 4 + .byte 6 + .byte 9 + .byte 4 + .byte 2 + .byte 1 + .byte 5 + .byte 2 + .byte 10 + .byte 4 + .byte 4 + .byte 1 + .byte 9 + .byte 2 + .byte 2 + .byte 5 + .byte 4 + .byte 2 + .byte 8 + .byte 4 + .byte 0 + .byte 1 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_5, @object + .size table_5, 256 +table_5: + .byte 204 + .byte 198 + .byte 201 + .byte 192 + .byte 193 + .byte 202 + .byte 194 + .byte 203 + .byte 195 + .byte 200 + .byte 197 + .byte 205 + .byte 196 + .byte 206 + .byte 199 + .byte 207 + .byte 108 + .byte 102 + .byte 105 + .byte 96 + .byte 97 + .byte 106 + .byte 98 + .byte 107 + .byte 99 + .byte 104 + .byte 101 + .byte 109 + .byte 100 + .byte 110 + .byte 103 + .byte 111 + .byte 156 + .byte 150 + .byte 153 + .byte 144 + .byte 145 + .byte 154 + .byte 146 + .byte 155 + .byte 147 + .byte 152 + .byte 149 + .byte 157 + .byte 148 + .byte 158 + .byte 151 + .byte 159 + .byte 12 + .byte 6 + .byte 9 + .byte 0 + .byte 1 + .byte 10 + .byte 2 + .byte 11 + .byte 3 + .byte 8 + .byte 5 + .byte 13 + .byte 4 + .byte 14 + .byte 7 + .byte 15 + .byte 28 + .byte 22 + .byte 25 + .byte 16 + .byte 17 + .byte 26 + .byte 18 + .byte 27 + .byte 19 + .byte 24 + .byte 21 + .byte 29 + .byte 20 + .byte 30 + .byte 23 + .byte 31 + .byte 172 + .byte 166 + .byte 169 + .byte 160 + .byte 161 + .byte 170 + .byte 162 + .byte 171 + .byte 163 + .byte 168 + .byte 165 + .byte 173 + .byte 164 + .byte 174 + .byte 167 + .byte 175 + .byte 44 + .byte 38 + .byte 41 + .byte 32 + .byte 33 + .byte 42 + .byte 34 + .byte 43 + .byte 35 + .byte 40 + .byte 37 + .byte 45 + .byte 36 + .byte 46 + .byte 39 + .byte 47 + .byte 188 + .byte 182 + .byte 185 + .byte 176 + .byte 177 + .byte 186 + .byte 178 + .byte 187 + .byte 179 + .byte 184 + .byte 181 + .byte 189 + .byte 180 + .byte 190 + .byte 183 + .byte 191 + .byte 60 + .byte 54 + .byte 57 + .byte 48 + .byte 49 + .byte 58 + .byte 50 + .byte 59 + .byte 51 + .byte 56 + .byte 53 + .byte 61 + .byte 52 + .byte 62 + .byte 55 + .byte 63 + .byte 140 + .byte 134 + .byte 137 + .byte 128 + .byte 129 + .byte 138 + .byte 130 + .byte 139 + .byte 131 + .byte 136 + .byte 133 + .byte 141 + .byte 132 + .byte 142 + .byte 135 + .byte 143 + .byte 92 + .byte 86 + .byte 89 + .byte 80 + .byte 81 + .byte 90 + .byte 82 + .byte 91 + .byte 83 + .byte 88 + .byte 85 + .byte 93 + .byte 84 + .byte 94 + .byte 87 + .byte 95 + .byte 220 + .byte 214 + .byte 217 + .byte 208 + .byte 209 + .byte 218 + .byte 210 + .byte 219 + .byte 211 + .byte 216 + .byte 213 + .byte 221 + .byte 212 + .byte 222 + .byte 215 + .byte 223 + .byte 76 + .byte 70 + .byte 73 + .byte 64 + .byte 65 + .byte 74 + .byte 66 + .byte 75 + .byte 67 + .byte 72 + .byte 69 + .byte 77 + .byte 68 + .byte 78 + .byte 71 + .byte 79 + .byte 236 + .byte 230 + .byte 233 + .byte 224 + .byte 225 + .byte 234 + .byte 226 + .byte 235 + .byte 227 + .byte 232 + .byte 229 + .byte 237 + .byte 228 + .byte 238 + .byte 231 + .byte 239 + .byte 124 + .byte 118 + .byte 121 + .byte 112 + .byte 113 + .byte 122 + .byte 114 + .byte 123 + .byte 115 + .byte 120 + .byte 117 + .byte 125 + .byte 116 + .byte 126 + .byte 119 + .byte 127 + .byte 252 + .byte 246 + .byte 249 + .byte 240 + .byte 241 + .byte 250 + .byte 242 + .byte 251 + .byte 243 + .byte 248 + .byte 245 + .byte 253 + .byte 244 + .byte 254 + .byte 247 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_6, @object + .size table_6, 256 +table_6: + .byte 51 + .byte 52 + .byte 54 + .byte 56 + .byte 60 + .byte 58 + .byte 49 + .byte 62 + .byte 57 + .byte 50 + .byte 53 + .byte 55 + .byte 48 + .byte 59 + .byte 61 + .byte 63 + .byte 67 + .byte 68 + .byte 70 + .byte 72 + .byte 76 + .byte 74 + .byte 65 + .byte 78 + .byte 73 + .byte 66 + .byte 69 + .byte 71 + .byte 64 + .byte 75 + .byte 77 + .byte 79 + .byte 99 + .byte 100 + .byte 102 + .byte 104 + .byte 108 + .byte 106 + .byte 97 + .byte 110 + .byte 105 + .byte 98 + .byte 101 + .byte 103 + .byte 96 + .byte 107 + .byte 109 + .byte 111 + .byte 131 + .byte 132 + .byte 134 + .byte 136 + .byte 140 + .byte 138 + .byte 129 + .byte 142 + .byte 137 + .byte 130 + .byte 133 + .byte 135 + .byte 128 + .byte 139 + .byte 141 + .byte 143 + .byte 195 + .byte 196 + .byte 198 + .byte 200 + .byte 204 + .byte 202 + .byte 193 + .byte 206 + .byte 201 + .byte 194 + .byte 197 + .byte 199 + .byte 192 + .byte 203 + .byte 205 + .byte 207 + .byte 163 + .byte 164 + .byte 166 + .byte 168 + .byte 172 + .byte 170 + .byte 161 + .byte 174 + .byte 169 + .byte 162 + .byte 165 + .byte 167 + .byte 160 + .byte 171 + .byte 173 + .byte 175 + .byte 19 + .byte 20 + .byte 22 + .byte 24 + .byte 28 + .byte 26 + .byte 17 + .byte 30 + .byte 25 + .byte 18 + .byte 21 + .byte 23 + .byte 16 + .byte 27 + .byte 29 + .byte 31 + .byte 227 + .byte 228 + .byte 230 + .byte 232 + .byte 236 + .byte 234 + .byte 225 + .byte 238 + .byte 233 + .byte 226 + .byte 229 + .byte 231 + .byte 224 + .byte 235 + .byte 237 + .byte 239 + .byte 147 + .byte 148 + .byte 150 + .byte 152 + .byte 156 + .byte 154 + .byte 145 + .byte 158 + .byte 153 + .byte 146 + .byte 149 + .byte 151 + .byte 144 + .byte 155 + .byte 157 + .byte 159 + .byte 35 + .byte 36 + .byte 38 + .byte 40 + .byte 44 + .byte 42 + .byte 33 + .byte 46 + .byte 41 + .byte 34 + .byte 37 + .byte 39 + .byte 32 + .byte 43 + .byte 45 + .byte 47 + .byte 83 + .byte 84 + .byte 86 + .byte 88 + .byte 92 + .byte 90 + .byte 81 + .byte 94 + .byte 89 + .byte 82 + .byte 85 + .byte 87 + .byte 80 + .byte 91 + .byte 93 + .byte 95 + .byte 115 + .byte 116 + .byte 118 + .byte 120 + .byte 124 + .byte 122 + .byte 113 + .byte 126 + .byte 121 + .byte 114 + .byte 117 + .byte 119 + .byte 112 + .byte 123 + .byte 125 + .byte 127 + .byte 3 + .byte 4 + .byte 6 + .byte 8 + .byte 12 + .byte 10 + .byte 1 + .byte 14 + .byte 9 + .byte 2 + .byte 5 + .byte 7 + .byte 0 + .byte 11 + .byte 13 + .byte 15 + .byte 179 + .byte 180 + .byte 182 + .byte 184 + .byte 188 + .byte 186 + .byte 177 + .byte 190 + .byte 185 + .byte 178 + .byte 181 + .byte 183 + .byte 176 + .byte 187 + .byte 189 + .byte 191 + .byte 211 + .byte 212 + .byte 214 + .byte 216 + .byte 220 + .byte 218 + .byte 209 + .byte 222 + .byte 217 + .byte 210 + .byte 213 + .byte 215 + .byte 208 + .byte 219 + .byte 221 + .byte 223 + .byte 243 + .byte 244 + .byte 246 + .byte 248 + .byte 252 + .byte 250 + .byte 241 + .byte 254 + .byte 249 + .byte 242 + .byte 245 + .byte 247 + .byte 240 + .byte 251 + .byte 253 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_7, @object + .size table_7, 256 +table_7: + .byte 0 + .byte 2 + .byte 4 + .byte 6 + .byte 9 + .byte 11 + .byte 13 + .byte 15 + .byte 1 + .byte 3 + .byte 5 + .byte 7 + .byte 8 + .byte 10 + .byte 12 + .byte 14 + .byte 32 + .byte 34 + .byte 36 + .byte 38 + .byte 41 + .byte 43 + .byte 45 + .byte 47 + .byte 33 + .byte 35 + .byte 37 + .byte 39 + .byte 40 + .byte 42 + .byte 44 + .byte 46 + .byte 64 + .byte 66 + .byte 68 + .byte 70 + .byte 73 + .byte 75 + .byte 77 + .byte 79 + .byte 65 + .byte 67 + .byte 69 + .byte 71 + .byte 72 + .byte 74 + .byte 76 + .byte 78 + .byte 96 + .byte 98 + .byte 100 + .byte 102 + .byte 105 + .byte 107 + .byte 109 + .byte 111 + .byte 97 + .byte 99 + .byte 101 + .byte 103 + .byte 104 + .byte 106 + .byte 108 + .byte 110 + .byte 144 + .byte 146 + .byte 148 + .byte 150 + .byte 153 + .byte 155 + .byte 157 + .byte 159 + .byte 145 + .byte 147 + .byte 149 + .byte 151 + .byte 152 + .byte 154 + .byte 156 + .byte 158 + .byte 176 + .byte 178 + .byte 180 + .byte 182 + .byte 185 + .byte 187 + .byte 189 + .byte 191 + .byte 177 + .byte 179 + .byte 181 + .byte 183 + .byte 184 + .byte 186 + .byte 188 + .byte 190 + .byte 208 + .byte 210 + .byte 212 + .byte 214 + .byte 217 + .byte 219 + .byte 221 + .byte 223 + .byte 209 + .byte 211 + .byte 213 + .byte 215 + .byte 216 + .byte 218 + .byte 220 + .byte 222 + .byte 240 + .byte 242 + .byte 244 + .byte 246 + .byte 249 + .byte 251 + .byte 253 + .byte 255 + .byte 241 + .byte 243 + .byte 245 + .byte 247 + .byte 248 + .byte 250 + .byte 252 + .byte 254 + .byte 16 + .byte 18 + .byte 20 + .byte 22 + .byte 25 + .byte 27 + .byte 29 + .byte 31 + .byte 17 + .byte 19 + .byte 21 + .byte 23 + .byte 24 + .byte 26 + .byte 28 + .byte 30 + .byte 48 + .byte 50 + .byte 52 + .byte 54 + .byte 57 + .byte 59 + .byte 61 + .byte 63 + .byte 49 + .byte 51 + .byte 53 + .byte 55 + .byte 56 + .byte 58 + .byte 60 + .byte 62 + .byte 80 + .byte 82 + .byte 84 + .byte 86 + .byte 89 + .byte 91 + .byte 93 + .byte 95 + .byte 81 + .byte 83 + .byte 85 + .byte 87 + .byte 88 + .byte 90 + .byte 92 + .byte 94 + .byte 112 + .byte 114 + .byte 116 + .byte 118 + .byte 121 + .byte 123 + .byte 125 + .byte 127 + .byte 113 + .byte 115 + .byte 117 + .byte 119 + .byte 120 + .byte 122 + .byte 124 + .byte 126 + .byte 128 + .byte 130 + .byte 132 + .byte 134 + .byte 137 + .byte 139 + .byte 141 + .byte 143 + .byte 129 + .byte 131 + .byte 133 + .byte 135 + .byte 136 + .byte 138 + .byte 140 + .byte 142 + .byte 160 + .byte 162 + .byte 164 + .byte 166 + .byte 169 + .byte 171 + .byte 173 + .byte 175 + .byte 161 + .byte 163 + .byte 165 + .byte 167 + .byte 168 + .byte 170 + .byte 172 + .byte 174 + .byte 192 + .byte 194 + .byte 196 + .byte 198 + .byte 201 + .byte 203 + .byte 205 + .byte 207 + .byte 193 + .byte 195 + .byte 197 + .byte 199 + .byte 200 + .byte 202 + .byte 204 + .byte 206 + .byte 224 + .byte 226 + .byte 228 + .byte 230 + .byte 233 + .byte 235 + .byte 237 + .byte 239 + .byte 225 + .byte 227 + .byte 229 + .byte 231 + .byte 232 + .byte 234 + .byte 236 + .byte 238 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_8, @object + .size table_8, 256 +table_8: + .byte 0 + .byte 8 + .byte 1 + .byte 9 + .byte 2 + .byte 10 + .byte 3 + .byte 11 + .byte 12 + .byte 4 + .byte 13 + .byte 5 + .byte 14 + .byte 6 + .byte 15 + .byte 7 + .byte 128 + .byte 136 + .byte 129 + .byte 137 + .byte 130 + .byte 138 + .byte 131 + .byte 139 + .byte 140 + .byte 132 + .byte 141 + .byte 133 + .byte 142 + .byte 134 + .byte 143 + .byte 135 + .byte 16 + .byte 24 + .byte 17 + .byte 25 + .byte 18 + .byte 26 + .byte 19 + .byte 27 + .byte 28 + .byte 20 + .byte 29 + .byte 21 + .byte 30 + .byte 22 + .byte 31 + .byte 23 + .byte 144 + .byte 152 + .byte 145 + .byte 153 + .byte 146 + .byte 154 + .byte 147 + .byte 155 + .byte 156 + .byte 148 + .byte 157 + .byte 149 + .byte 158 + .byte 150 + .byte 159 + .byte 151 + .byte 32 + .byte 40 + .byte 33 + .byte 41 + .byte 34 + .byte 42 + .byte 35 + .byte 43 + .byte 44 + .byte 36 + .byte 45 + .byte 37 + .byte 46 + .byte 38 + .byte 47 + .byte 39 + .byte 160 + .byte 168 + .byte 161 + .byte 169 + .byte 162 + .byte 170 + .byte 163 + .byte 171 + .byte 172 + .byte 164 + .byte 173 + .byte 165 + .byte 174 + .byte 166 + .byte 175 + .byte 167 + .byte 48 + .byte 56 + .byte 49 + .byte 57 + .byte 50 + .byte 58 + .byte 51 + .byte 59 + .byte 60 + .byte 52 + .byte 61 + .byte 53 + .byte 62 + .byte 54 + .byte 63 + .byte 55 + .byte 176 + .byte 184 + .byte 177 + .byte 185 + .byte 178 + .byte 186 + .byte 179 + .byte 187 + .byte 188 + .byte 180 + .byte 189 + .byte 181 + .byte 190 + .byte 182 + .byte 191 + .byte 183 + .byte 192 + .byte 200 + .byte 193 + .byte 201 + .byte 194 + .byte 202 + .byte 195 + .byte 203 + .byte 204 + .byte 196 + .byte 205 + .byte 197 + .byte 206 + .byte 198 + .byte 207 + .byte 199 + .byte 64 + .byte 72 + .byte 65 + .byte 73 + .byte 66 + .byte 74 + .byte 67 + .byte 75 + .byte 76 + .byte 68 + .byte 77 + .byte 69 + .byte 78 + .byte 70 + .byte 79 + .byte 71 + .byte 208 + .byte 216 + .byte 209 + .byte 217 + .byte 210 + .byte 218 + .byte 211 + .byte 219 + .byte 220 + .byte 212 + .byte 221 + .byte 213 + .byte 222 + .byte 214 + .byte 223 + .byte 215 + .byte 80 + .byte 88 + .byte 81 + .byte 89 + .byte 82 + .byte 90 + .byte 83 + .byte 91 + .byte 92 + .byte 84 + .byte 93 + .byte 85 + .byte 94 + .byte 86 + .byte 95 + .byte 87 + .byte 224 + .byte 232 + .byte 225 + .byte 233 + .byte 226 + .byte 234 + .byte 227 + .byte 235 + .byte 236 + .byte 228 + .byte 237 + .byte 229 + .byte 238 + .byte 230 + .byte 239 + .byte 231 + .byte 96 + .byte 104 + .byte 97 + .byte 105 + .byte 98 + .byte 106 + .byte 99 + .byte 107 + .byte 108 + .byte 100 + .byte 109 + .byte 101 + .byte 110 + .byte 102 + .byte 111 + .byte 103 + .byte 240 + .byte 248 + .byte 241 + .byte 249 + .byte 242 + .byte 250 + .byte 243 + .byte 251 + .byte 252 + .byte 244 + .byte 253 + .byte 245 + .byte 254 + .byte 246 + .byte 255 + .byte 247 + .byte 112 + .byte 120 + .byte 113 + .byte 121 + .byte 114 + .byte 122 + .byte 115 + .byte 123 + .byte 124 + .byte 116 + .byte 125 + .byte 117 + .byte 126 + .byte 118 + .byte 127 + .byte 119 + + .text +.global forkskinny_128_256_rounds + .type forkskinny_128_256_rounds, @function +forkskinny_128_256_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 51 + ldd r2,Z+32 + ldd r3,Z+33 + ldd r4,Z+34 + ldd r5,Z+35 + ldd r6,Z+36 + ldd r7,Z+37 + ldd r8,Z+38 + ldd r9,Z+39 + ldd r10,Z+40 + ldd r11,Z+41 + ldd r12,Z+42 + ldd r13,Z+43 + ldd r14,Z+44 + ldd r15,Z+45 + ldd r24,Z+46 + ldd r25,Z+47 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + lsl r20 + std Y+33,r20 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +86: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + mov r0,r14 + mov r14,r15 + mov r15,r24 + mov r24,r25 + mov r25,r0 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + movw r18,r14 + movw r26,r24 + eor r18,r10 + eor r19,r11 + eor r26,r12 + eor r27,r13 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r18 + movw r4,r26 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + ldd r0,Y+1 + std Y+9,r0 + ldd r0,Y+2 + std Y+10,r0 + ldd r0,Y+3 + std Y+11,r0 + ldd r0,Y+4 + std Y+12,r0 + ldd r0,Y+5 + std Y+13,r0 + ldd r0,Y+6 + std Y+14,r0 + ldd r0,Y+7 + std Y+15,r0 + ldd r0,Y+8 + std Y+16,r0 + std Y+1,r19 + std Y+2,r17 + std Y+3,r18 + std Y+4,r21 + std Y+5,r26 + std Y+6,r16 + std Y+7,r20 + std Y+8,r27 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r16,Y+31 + ldd r17,Y+32 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+17 + std Y+25,r0 + ldd r0,Y+18 + std Y+26,r0 + ldd r0,Y+19 + std Y+27,r0 + ldd r0,Y+20 + std Y+28,r0 + ldd r0,Y+21 + std Y+29,r0 + ldd r0,Y+22 + std Y+30,r0 + ldd r0,Y+23 + std Y+31,r0 + ldd r0,Y+24 + std Y+32,r0 + std Y+17,r19 + std Y+18,r17 + std Y+19,r18 + std Y+20,r21 + std Y+21,r26 + std Y+22,r16 + std Y+23,r20 + std Y+24,r27 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + ldd r18,Y+33 + cp r22,r18 + breq 5259f + rjmp 86b +5259: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+32,r2 + std Z+33,r3 + std Z+34,r4 + std Z+35,r5 + std Z+36,r6 + std Z+37,r7 + std Z+38,r8 + std Z+39,r9 + std Z+40,r10 + std Z+41,r11 + std Z+42,r12 + std Z+43,r13 + std Z+44,r14 + std Z+45,r15 + std Z+46,r24 + std Z+47,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + adiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_256_rounds, .-forkskinny_128_256_rounds + + .text +.global forkskinny_128_256_inv_rounds + .type forkskinny_128_256_inv_rounds, @function +forkskinny_128_256_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 51 + ldd r2,Z+32 + ldd r3,Z+33 + ldd r4,Z+34 + ldd r5,Z+35 + ldd r6,Z+36 + ldd r7,Z+37 + ldd r8,Z+38 + ldd r9,Z+39 + ldd r10,Z+40 + ldd r11,Z+41 + ldd r12,Z+42 + ldd r13,Z+43 + ldd r14,Z+44 + ldd r15,Z+45 + ldd r24,Z+46 + ldd r25,Z+47 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + lsl r20 + std Y+33,r20 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +86: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + ldd r0,Y+9 + std Y+1,r0 + ldd r0,Y+10 + std Y+2,r0 + ldd r0,Y+11 + std Y+3,r0 + ldd r0,Y+12 + std Y+4,r0 + ldd r0,Y+13 + std Y+5,r0 + ldd r0,Y+14 + std Y+6,r0 + ldd r0,Y+15 + std Y+7,r0 + ldd r0,Y+16 + std Y+8,r0 + std Y+9,r26 + std Y+10,r18 + std Y+11,r20 + std Y+12,r17 + std Y+13,r16 + std Y+14,r27 + std Y+15,r21 + std Y+16,r19 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r16,Y+23 + ldd r17,Y+24 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+25 + std Y+17,r0 + ldd r0,Y+26 + std Y+18,r0 + ldd r0,Y+27 + std Y+19,r0 + ldd r0,Y+28 + std Y+20,r0 + ldd r0,Y+29 + std Y+21,r0 + ldd r0,Y+30 + std Y+22,r0 + ldd r0,Y+31 + std Y+23,r0 + ldd r0,Y+32 + std Y+24,r0 + std Y+25,r26 + std Y+26,r18 + std Y+27,r20 + std Y+28,r17 + std Y+29,r16 + std Y+30,r27 + std Y+31,r21 + std Y+32,r19 + movw r18,r2 + movw r26,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r18 + movw r24,r26 + eor r14,r10 + eor r15,r11 + eor r24,r12 + eor r25,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + mov r0,r6 + mov r6,r7 + mov r7,r8 + mov r8,r9 + mov r9,r0 + mov r0,r10 + mov r10,r12 + mov r12,r0 + mov r0,r11 + mov r11,r13 + mov r13,r0 + mov r0,r25 + mov r25,r24 + mov r24,r15 + mov r15,r14 + mov r14,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + ldi r30,lo8(table_1) + ldi r31,hi8(table_1) +#if defined(RAMPZ) + ldi r18,hh8(table_1) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+33 + cp r22,r18 + breq 5259f + rjmp 86b +5259: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+32,r2 + std Z+33,r3 + std Z+34,r4 + std Z+35,r5 + std Z+36,r6 + std Z+37,r7 + std Z+38,r8 + std Z+39,r9 + std Z+40,r10 + std Z+41,r11 + std Z+42,r12 + std Z+43,r13 + std Z+44,r14 + std Z+45,r15 + std Z+46,r24 + std Z+47,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + adiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_256_inv_rounds, .-forkskinny_128_256_inv_rounds + + .text +.global forkskinny_128_256_forward_tk + .type forkskinny_128_256_forward_tk, @function +forkskinny_128_256_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 34 + ld r4,Z + ldd r5,Z+1 + ldd r6,Z+2 + ldd r7,Z+3 + ldd r8,Z+4 + ldd r9,Z+5 + ldd r10,Z+6 + ldd r11,Z+7 + ldd r12,Z+8 + ldd r13,Z+9 + ldd r14,Z+10 + ldd r15,Z+11 + ldd r24,Z+12 + ldd r25,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + push r31 + push r30 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r23,hh8(table_2) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +51: + movw r18,r12 + movw r20,r14 + movw r26,r24 + movw r2,r16 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + mov r4,r19 + mov r5,r3 + mov r6,r18 + mov r7,r27 + mov r8,r20 + mov r9,r2 + mov r10,r26 + mov r11,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r26,Y+13 + ldd r27,Y+14 + ldd r2,Y+15 + ldd r3,Y+16 + ldd r23,Y+1 + std Y+9,r23 + ldd r23,Y+2 + std Y+10,r23 + ldd r23,Y+3 + std Y+11,r23 + ldd r23,Y+4 + std Y+12,r23 + ldd r23,Y+5 + std Y+13,r23 + ldd r23,Y+6 + std Y+14,r23 + ldd r23,Y+7 + std Y+15,r23 + ldd r23,Y+8 + std Y+16,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+1,r19 + std Y+2,r3 + std Y+3,r18 + std Y+4,r27 + std Y+5,r20 + std Y+6,r2 + std Y+7,r26 + std Y+8,r21 + dec r22 + breq 5109f + rjmp 51b +5109: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r4 + std Z+1,r5 + std Z+2,r6 + std Z+3,r7 + std Z+4,r8 + std Z+5,r9 + std Z+6,r10 + std Z+7,r11 + std Z+8,r12 + std Z+9,r13 + std Z+10,r14 + std Z+11,r15 + std Z+12,r24 + std Z+13,r25 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_256_forward_tk, .-forkskinny_128_256_forward_tk + + .text +.global forkskinny_128_256_reverse_tk + .type forkskinny_128_256_reverse_tk, @function +forkskinny_128_256_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 34 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r23,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +51: + movw r18,r2 + movw r20,r4 + movw r26,r6 + movw r24,r8 + movw r2,r10 + movw r4,r12 + movw r6,r14 + movw r8,r16 + mov r10,r20 + mov r11,r18 + mov r12,r26 + mov r13,r25 + mov r14,r24 + mov r15,r21 + mov r16,r27 + mov r17,r19 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r24,Y+7 + ldd r25,Y+8 + ldd r23,Y+9 + std Y+1,r23 + ldd r23,Y+10 + std Y+2,r23 + ldd r23,Y+11 + std Y+3,r23 + ldd r23,Y+12 + std Y+4,r23 + ldd r23,Y+13 + std Y+5,r23 + ldd r23,Y+14 + std Y+6,r23 + ldd r23,Y+15 + std Y+7,r23 + ldd r23,Y+16 + std Y+8,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+9,r20 + std Y+10,r18 + std Y+11,r26 + std Y+12,r25 + std Y+13,r24 + std Y+14,r21 + std Y+15,r27 + std Y+16,r19 + dec r22 + breq 5109f + rjmp 51b +5109: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r2 + std Z+1,r3 + std Z+2,r4 + std Z+3,r5 + std Z+4,r6 + std Z+5,r7 + std Z+6,r8 + std Z+7,r9 + std Z+8,r10 + std Z+9,r11 + std Z+10,r12 + std Z+11,r13 + std Z+12,r14 + std Z+13,r15 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_256_reverse_tk, .-forkskinny_128_256_reverse_tk + + .text +.global forkskinny_128_384_rounds + .type forkskinny_128_384_rounds, @function +forkskinny_128_384_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 67 + ldd r2,Z+48 + ldd r3,Z+49 + ldd r4,Z+50 + ldd r5,Z+51 + ldd r6,Z+52 + ldd r7,Z+53 + ldd r8,Z+54 + ldd r9,Z+55 + ldd r10,Z+56 + ldd r11,Z+57 + ldd r12,Z+58 + ldd r13,Z+59 + ldd r14,Z+60 + ldd r15,Z+61 + ldd r24,Z+62 + ldd r25,Z+63 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r26,Z+34 + ldd r27,Z+35 + std Y+33,r18 + std Y+34,r19 + std Y+35,r26 + std Y+36,r27 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + std Y+37,r18 + std Y+38,r19 + std Y+39,r26 + std Y+40,r27 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r26,Z+42 + ldd r27,Z+43 + std Y+41,r18 + std Y+42,r19 + std Y+43,r26 + std Y+44,r27 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r26,Z+46 + ldd r27,Z+47 + std Y+45,r18 + std Y+46,r19 + std Y+47,r26 + std Y+48,r27 + lsl r20 + std Y+49,r20 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +118: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + mov r0,r14 + mov r14,r15 + mov r15,r24 + mov r24,r25 + mov r25,r0 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + movw r18,r14 + movw r26,r24 + eor r18,r10 + eor r19,r11 + eor r26,r12 + eor r27,r13 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r18 + movw r4,r26 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + ldd r0,Y+1 + std Y+9,r0 + ldd r0,Y+2 + std Y+10,r0 + ldd r0,Y+3 + std Y+11,r0 + ldd r0,Y+4 + std Y+12,r0 + ldd r0,Y+5 + std Y+13,r0 + ldd r0,Y+6 + std Y+14,r0 + ldd r0,Y+7 + std Y+15,r0 + ldd r0,Y+8 + std Y+16,r0 + std Y+1,r19 + std Y+2,r17 + std Y+3,r18 + std Y+4,r21 + std Y+5,r26 + std Y+6,r16 + std Y+7,r20 + std Y+8,r27 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r16,Y+31 + ldd r17,Y+32 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+17 + std Y+25,r0 + ldd r0,Y+18 + std Y+26,r0 + ldd r0,Y+19 + std Y+27,r0 + ldd r0,Y+20 + std Y+28,r0 + ldd r0,Y+21 + std Y+29,r0 + ldd r0,Y+22 + std Y+30,r0 + ldd r0,Y+23 + std Y+31,r0 + ldd r0,Y+24 + std Y+32,r0 + std Y+17,r19 + std Y+18,r17 + std Y+19,r18 + std Y+20,r21 + std Y+21,r26 + std Y+22,r16 + std Y+23,r20 + std Y+24,r27 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + ldd r20,Y+45 + ldd r21,Y+46 + ldd r16,Y+47 + ldd r17,Y+48 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+33 + std Y+41,r0 + ldd r0,Y+34 + std Y+42,r0 + ldd r0,Y+35 + std Y+43,r0 + ldd r0,Y+36 + std Y+44,r0 + ldd r0,Y+37 + std Y+45,r0 + ldd r0,Y+38 + std Y+46,r0 + ldd r0,Y+39 + std Y+47,r0 + ldd r0,Y+40 + std Y+48,r0 + std Y+33,r19 + std Y+34,r17 + std Y+35,r18 + std Y+36,r21 + std Y+37,r26 + std Y+38,r16 + std Y+39,r20 + std Y+40,r27 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + ldd r18,Y+49 + cp r22,r18 + breq 5348f + rjmp 118b +5348: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+48,r2 + std Z+49,r3 + std Z+50,r4 + std Z+51,r5 + std Z+52,r6 + std Z+53,r7 + std Z+54,r8 + std Z+55,r9 + std Z+56,r10 + std Z+57,r11 + std Z+58,r12 + std Z+59,r13 + std Z+60,r14 + std Z+61,r15 + std Z+62,r24 + std Z+63,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + std Z+32,r18 + std Z+33,r19 + std Z+34,r26 + std Z+35,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + std Z+36,r18 + std Z+37,r19 + std Z+38,r26 + std Z+39,r27 + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + std Z+40,r18 + std Z+41,r19 + std Z+42,r26 + std Z+43,r27 + ldd r18,Y+45 + ldd r19,Y+46 + ldd r26,Y+47 + ldd r27,Y+48 + std Z+44,r18 + std Z+45,r19 + std Z+46,r26 + std Z+47,r27 + adiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_384_rounds, .-forkskinny_128_384_rounds + + .text +.global forkskinny_128_384_inv_rounds + .type forkskinny_128_384_inv_rounds, @function +forkskinny_128_384_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 67 + ldd r2,Z+48 + ldd r3,Z+49 + ldd r4,Z+50 + ldd r5,Z+51 + ldd r6,Z+52 + ldd r7,Z+53 + ldd r8,Z+54 + ldd r9,Z+55 + ldd r10,Z+56 + ldd r11,Z+57 + ldd r12,Z+58 + ldd r13,Z+59 + ldd r14,Z+60 + ldd r15,Z+61 + ldd r24,Z+62 + ldd r25,Z+63 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r26,Z+34 + ldd r27,Z+35 + std Y+33,r18 + std Y+34,r19 + std Y+35,r26 + std Y+36,r27 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + std Y+37,r18 + std Y+38,r19 + std Y+39,r26 + std Y+40,r27 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r26,Z+42 + ldd r27,Z+43 + std Y+41,r18 + std Y+42,r19 + std Y+43,r26 + std Y+44,r27 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r26,Z+46 + ldd r27,Z+47 + std Y+45,r18 + std Y+46,r19 + std Y+47,r26 + std Y+48,r27 + lsl r20 + std Y+49,r20 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +118: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + ldd r0,Y+9 + std Y+1,r0 + ldd r0,Y+10 + std Y+2,r0 + ldd r0,Y+11 + std Y+3,r0 + ldd r0,Y+12 + std Y+4,r0 + ldd r0,Y+13 + std Y+5,r0 + ldd r0,Y+14 + std Y+6,r0 + ldd r0,Y+15 + std Y+7,r0 + ldd r0,Y+16 + std Y+8,r0 + std Y+9,r26 + std Y+10,r18 + std Y+11,r20 + std Y+12,r17 + std Y+13,r16 + std Y+14,r27 + std Y+15,r21 + std Y+16,r19 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r16,Y+23 + ldd r17,Y+24 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+25 + std Y+17,r0 + ldd r0,Y+26 + std Y+18,r0 + ldd r0,Y+27 + std Y+19,r0 + ldd r0,Y+28 + std Y+20,r0 + ldd r0,Y+29 + std Y+21,r0 + ldd r0,Y+30 + std Y+22,r0 + ldd r0,Y+31 + std Y+23,r0 + ldd r0,Y+32 + std Y+24,r0 + std Y+25,r26 + std Y+26,r18 + std Y+27,r20 + std Y+28,r17 + std Y+29,r16 + std Y+30,r27 + std Y+31,r21 + std Y+32,r19 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + ldd r20,Y+37 + ldd r21,Y+38 + ldd r16,Y+39 + ldd r17,Y+40 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+41 + std Y+33,r0 + ldd r0,Y+42 + std Y+34,r0 + ldd r0,Y+43 + std Y+35,r0 + ldd r0,Y+44 + std Y+36,r0 + ldd r0,Y+45 + std Y+37,r0 + ldd r0,Y+46 + std Y+38,r0 + ldd r0,Y+47 + std Y+39,r0 + ldd r0,Y+48 + std Y+40,r0 + std Y+41,r26 + std Y+42,r18 + std Y+43,r20 + std Y+44,r17 + std Y+45,r16 + std Y+46,r27 + std Y+47,r21 + std Y+48,r19 + movw r18,r2 + movw r26,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r18 + movw r24,r26 + eor r14,r10 + eor r15,r11 + eor r24,r12 + eor r25,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + mov r0,r6 + mov r6,r7 + mov r7,r8 + mov r8,r9 + mov r9,r0 + mov r0,r10 + mov r10,r12 + mov r12,r0 + mov r0,r11 + mov r11,r13 + mov r13,r0 + mov r0,r25 + mov r25,r24 + mov r24,r15 + mov r15,r14 + mov r14,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + ldi r30,lo8(table_1) + ldi r31,hi8(table_1) +#if defined(RAMPZ) + ldi r18,hh8(table_1) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+49 + cp r22,r18 + breq 5348f + rjmp 118b +5348: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+48,r2 + std Z+49,r3 + std Z+50,r4 + std Z+51,r5 + std Z+52,r6 + std Z+53,r7 + std Z+54,r8 + std Z+55,r9 + std Z+56,r10 + std Z+57,r11 + std Z+58,r12 + std Z+59,r13 + std Z+60,r14 + std Z+61,r15 + std Z+62,r24 + std Z+63,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + std Z+32,r18 + std Z+33,r19 + std Z+34,r26 + std Z+35,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + std Z+36,r18 + std Z+37,r19 + std Z+38,r26 + std Z+39,r27 + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + std Z+40,r18 + std Z+41,r19 + std Z+42,r26 + std Z+43,r27 + ldd r18,Y+45 + ldd r19,Y+46 + ldd r26,Y+47 + ldd r27,Y+48 + std Z+44,r18 + std Z+45,r19 + std Z+46,r26 + std Z+47,r27 + adiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_384_inv_rounds, .-forkskinny_128_384_inv_rounds + + .text +.global forkskinny_128_384_forward_tk + .type forkskinny_128_384_forward_tk, @function +forkskinny_128_384_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r4,Z + ldd r5,Z+1 + ldd r6,Z+2 + ldd r7,Z+3 + ldd r8,Z+4 + ldd r9,Z+5 + ldd r10,Z+6 + ldd r11,Z+7 + ldd r12,Z+8 + ldd r13,Z+9 + ldd r14,Z+10 + ldd r15,Z+11 + ldd r24,Z+12 + ldd r25,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + std Y+17,r18 + std Y+18,r19 + std Y+19,r20 + std Y+20,r21 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r20,Z+38 + ldd r21,Z+39 + std Y+21,r18 + std Y+22,r19 + std Y+23,r20 + std Y+24,r21 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r20,Z+42 + ldd r21,Z+43 + std Y+25,r18 + std Y+26,r19 + std Y+27,r20 + std Y+28,r21 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r20,Z+46 + ldd r21,Z+47 + std Y+29,r18 + std Y+30,r19 + std Y+31,r20 + std Y+32,r21 + push r31 + push r30 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r23,hh8(table_2) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +83: + movw r18,r12 + movw r20,r14 + movw r26,r24 + movw r2,r16 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + mov r4,r19 + mov r5,r3 + mov r6,r18 + mov r7,r27 + mov r8,r20 + mov r9,r2 + mov r10,r26 + mov r11,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r26,Y+13 + ldd r27,Y+14 + ldd r2,Y+15 + ldd r3,Y+16 + ldd r23,Y+1 + std Y+9,r23 + ldd r23,Y+2 + std Y+10,r23 + ldd r23,Y+3 + std Y+11,r23 + ldd r23,Y+4 + std Y+12,r23 + ldd r23,Y+5 + std Y+13,r23 + ldd r23,Y+6 + std Y+14,r23 + ldd r23,Y+7 + std Y+15,r23 + ldd r23,Y+8 + std Y+16,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+1,r19 + std Y+2,r3 + std Y+3,r18 + std Y+4,r27 + std Y+5,r20 + std Y+6,r2 + std Y+7,r26 + std Y+8,r21 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + ldd r26,Y+29 + ldd r27,Y+30 + ldd r2,Y+31 + ldd r3,Y+32 + ldd r23,Y+17 + std Y+25,r23 + ldd r23,Y+18 + std Y+26,r23 + ldd r23,Y+19 + std Y+27,r23 + ldd r23,Y+20 + std Y+28,r23 + ldd r23,Y+21 + std Y+29,r23 + ldd r23,Y+22 + std Y+30,r23 + ldd r23,Y+23 + std Y+31,r23 + ldd r23,Y+24 + std Y+32,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+17,r19 + std Y+18,r3 + std Y+19,r18 + std Y+20,r27 + std Y+21,r20 + std Y+22,r2 + std Y+23,r26 + std Y+24,r21 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + breq 5183f + rjmp 83b +5183: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r4 + std Z+1,r5 + std Z+2,r6 + std Z+3,r7 + std Z+4,r8 + std Z+5,r9 + std Z+6,r10 + std Z+7,r11 + std Z+8,r12 + std Z+9,r13 + std Z+10,r14 + std Z+11,r15 + std Z+12,r24 + std Z+13,r25 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r20,Y+23 + ldd r21,Y+24 + std Z+36,r18 + std Z+37,r19 + std Z+38,r20 + std Z+39,r21 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + std Z+40,r18 + std Z+41,r19 + std Z+42,r20 + std Z+43,r21 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r20,Y+31 + ldd r21,Y+32 + std Z+44,r18 + std Z+45,r19 + std Z+46,r20 + std Z+47,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_384_forward_tk, .-forkskinny_128_384_forward_tk + + .text +.global forkskinny_128_384_reverse_tk + .type forkskinny_128_384_reverse_tk, @function +forkskinny_128_384_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + std Y+17,r18 + std Y+18,r19 + std Y+19,r20 + std Y+20,r21 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r20,Z+38 + ldd r21,Z+39 + std Y+21,r18 + std Y+22,r19 + std Y+23,r20 + std Y+24,r21 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r20,Z+42 + ldd r21,Z+43 + std Y+25,r18 + std Y+26,r19 + std Y+27,r20 + std Y+28,r21 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r20,Z+46 + ldd r21,Z+47 + std Y+29,r18 + std Y+30,r19 + std Y+31,r20 + std Y+32,r21 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r23,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +83: + movw r18,r2 + movw r20,r4 + movw r26,r6 + movw r24,r8 + movw r2,r10 + movw r4,r12 + movw r6,r14 + movw r8,r16 + mov r10,r20 + mov r11,r18 + mov r12,r26 + mov r13,r25 + mov r14,r24 + mov r15,r21 + mov r16,r27 + mov r17,r19 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r24,Y+7 + ldd r25,Y+8 + ldd r23,Y+9 + std Y+1,r23 + ldd r23,Y+10 + std Y+2,r23 + ldd r23,Y+11 + std Y+3,r23 + ldd r23,Y+12 + std Y+4,r23 + ldd r23,Y+13 + std Y+5,r23 + ldd r23,Y+14 + std Y+6,r23 + ldd r23,Y+15 + std Y+7,r23 + ldd r23,Y+16 + std Y+8,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+9,r20 + std Y+10,r18 + std Y+11,r26 + std Y+12,r25 + std Y+13,r24 + std Y+14,r21 + std Y+15,r27 + std Y+16,r19 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + ldd r26,Y+21 + ldd r27,Y+22 + ldd r24,Y+23 + ldd r25,Y+24 + ldd r23,Y+25 + std Y+17,r23 + ldd r23,Y+26 + std Y+18,r23 + ldd r23,Y+27 + std Y+19,r23 + ldd r23,Y+28 + std Y+20,r23 + ldd r23,Y+29 + std Y+21,r23 + ldd r23,Y+30 + std Y+22,r23 + ldd r23,Y+31 + std Y+23,r23 + ldd r23,Y+32 + std Y+24,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+25,r20 + std Y+26,r18 + std Y+27,r26 + std Y+28,r25 + std Y+29,r24 + std Y+30,r21 + std Y+31,r27 + std Y+32,r19 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + breq 5183f + rjmp 83b +5183: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r2 + std Z+1,r3 + std Z+2,r4 + std Z+3,r5 + std Z+4,r6 + std Z+5,r7 + std Z+6,r8 + std Z+7,r9 + std Z+8,r10 + std Z+9,r11 + std Z+10,r12 + std Z+11,r13 + std Z+12,r14 + std Z+13,r15 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r20,Y+23 + ldd r21,Y+24 + std Z+36,r18 + std Z+37,r19 + std Z+38,r20 + std Z+39,r21 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + std Z+40,r18 + std Z+41,r19 + std Z+42,r20 + std Z+43,r21 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r20,Y+31 + ldd r21,Y+32 + std Z+44,r18 + std Z+45,r19 + std Z+46,r20 + std Z+47,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_384_reverse_tk, .-forkskinny_128_384_reverse_tk + + .text +.global forkskinny_64_192_rounds + .type forkskinny_64_192_rounds, @function +forkskinny_64_192_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 38 + ldd r26,Z+24 + ldd r27,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + ld r18,Z + ldd r19,Z+1 + std Y+1,r18 + std Y+2,r19 + ldd r18,Z+2 + ldd r19,Z+3 + std Y+3,r18 + std Y+4,r19 + ldd r18,Z+4 + ldd r19,Z+5 + std Y+5,r18 + std Y+6,r19 + ldd r18,Z+6 + ldd r19,Z+7 + std Y+7,r18 + std Y+8,r19 + ldd r18,Z+8 + ldd r19,Z+9 + std Y+9,r18 + std Y+10,r19 + ldd r18,Z+10 + ldd r19,Z+11 + std Y+11,r18 + std Y+12,r19 + ldd r18,Z+12 + ldd r19,Z+13 + std Y+13,r18 + std Y+14,r19 + ldd r18,Z+14 + ldd r19,Z+15 + std Y+15,r18 + std Y+16,r19 + ldd r18,Z+16 + ldd r19,Z+17 + std Y+17,r18 + std Y+18,r19 + ldd r18,Z+18 + ldd r19,Z+19 + std Y+19,r18 + std Y+20,r19 + ldd r18,Z+20 + ldd r19,Z+21 + std Y+21,r18 + std Y+22,r19 + ldd r18,Z+22 + ldd r19,Z+23 + std Y+23,r18 + std Y+24,r19 + push r31 + push r30 + ldi r30,lo8(table_5) + ldi r31,hi8(table_5) +#if defined(RAMPZ) + ldi r18,hh8(table_5) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 + lsl r20 +61: + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + eor r26,r18 + eor r27,r19 + ldd r18,Y+3 + ldd r19,Y+4 + eor r2,r18 + eor r3,r19 + ldd r18,Y+9 + ldd r19,Y+10 + eor r26,r18 + eor r27,r19 + ldd r18,Y+11 + ldd r19,Y+12 + eor r2,r18 + eor r3,r19 + ldd r18,Y+17 + ldd r19,Y+18 + eor r26,r18 + eor r27,r19 + ldd r18,Y+19 + ldd r19,Y+20 + eor r2,r18 + eor r3,r19 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r27,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r3,r18 + ldi r18,32 + eor r5,r18 + eor r26,r18 + mov r0,r1 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + or r3,r0 + mov r0,r4 + mov r4,r5 + mov r5,r0 + mov r0,r6 + mov r6,r7 + mov r7,r0 + mov r0,r1 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + or r7,r0 + eor r2,r4 + eor r3,r5 + eor r4,r26 + eor r5,r27 + movw r18,r6 + eor r18,r4 + eor r19,r5 + movw r6,r4 + movw r4,r2 + movw r2,r26 + movw r26,r18 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r8,Y+3 + ldd r9,Y+4 + ldd r10,Y+5 + ldd r11,Y+6 + ldd r12,Y+7 + ldd r13,Y+8 + std Y+5,r18 + std Y+6,r19 + std Y+7,r8 + std Y+8,r9 + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + ldi r25,240 + and r9,r25 + swap r12 + ldi r24,15 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+1,r18 + std Y+2,r19 + std Y+3,r8 + std Y+4,r9 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r18,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+9 + ldd r19,Y+10 + ldd r8,Y+11 + ldd r9,Y+12 + ldd r10,Y+13 + ldd r11,Y+14 + ldd r12,Y+15 + ldd r13,Y+16 + std Y+13,r18 + std Y+14,r19 + std Y+15,r8 + std Y+16,r9 + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + and r9,r25 + swap r12 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+9,r18 + std Y+10,r19 + std Y+11,r8 + std Y+12,r9 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r8,Y+19 + ldd r9,Y+20 + ldd r10,Y+21 + ldd r11,Y+22 + ldd r12,Y+23 + ldd r13,Y+24 + std Y+21,r18 + std Y+22,r19 + std Y+23,r8 + std Y+24,r9 + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + and r9,r25 + swap r12 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+17,r18 + std Y+18,r19 + std Y+19,r8 + std Y+20,r9 + ldi r30,lo8(table_5) + ldi r31,hi8(table_5) +#if defined(RAMPZ) + ldi r18,hh8(table_5) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + cp r22,r20 + breq 5273f + rjmp 61b +5273: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+24,r26 + std Z+25,r27 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Y+1 + ldd r19,Y+2 + st Z,r18 + std Z+1,r19 + ldd r18,Y+3 + ldd r19,Y+4 + std Z+2,r18 + std Z+3,r19 + ldd r18,Y+5 + ldd r19,Y+6 + std Z+4,r18 + std Z+5,r19 + ldd r18,Y+7 + ldd r19,Y+8 + std Z+6,r18 + std Z+7,r19 + ldd r18,Y+9 + ldd r19,Y+10 + std Z+8,r18 + std Z+9,r19 + ldd r18,Y+11 + ldd r19,Y+12 + std Z+10,r18 + std Z+11,r19 + ldd r18,Y+13 + ldd r19,Y+14 + std Z+12,r18 + std Z+13,r19 + ldd r18,Y+15 + ldd r19,Y+16 + std Z+14,r18 + std Z+15,r19 + ldd r18,Y+17 + ldd r19,Y+18 + std Z+16,r18 + std Z+17,r19 + ldd r18,Y+19 + ldd r19,Y+20 + std Z+18,r18 + std Z+19,r19 + ldd r18,Y+21 + ldd r19,Y+22 + std Z+20,r18 + std Z+21,r19 + ldd r18,Y+23 + ldd r19,Y+24 + std Z+22,r18 + std Z+23,r19 + adiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_rounds, .-forkskinny_64_192_rounds + + .text +.global forkskinny_64_192_inv_rounds + .type forkskinny_64_192_inv_rounds, @function +forkskinny_64_192_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 38 + ldd r26,Z+24 + ldd r27,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + ld r18,Z + ldd r19,Z+1 + std Y+1,r18 + std Y+2,r19 + ldd r18,Z+2 + ldd r19,Z+3 + std Y+3,r18 + std Y+4,r19 + ldd r18,Z+4 + ldd r19,Z+5 + std Y+5,r18 + std Y+6,r19 + ldd r18,Z+6 + ldd r19,Z+7 + std Y+7,r18 + std Y+8,r19 + ldd r18,Z+8 + ldd r19,Z+9 + std Y+9,r18 + std Y+10,r19 + ldd r18,Z+10 + ldd r19,Z+11 + std Y+11,r18 + std Y+12,r19 + ldd r18,Z+12 + ldd r19,Z+13 + std Y+13,r18 + std Y+14,r19 + ldd r18,Z+14 + ldd r19,Z+15 + std Y+15,r18 + std Y+16,r19 + ldd r18,Z+16 + ldd r19,Z+17 + std Y+17,r18 + std Y+18,r19 + ldd r18,Z+18 + ldd r19,Z+19 + std Y+19,r18 + std Y+20,r19 + ldd r18,Z+20 + ldd r19,Z+21 + std Y+21,r18 + std Y+22,r19 + ldd r18,Z+22 + ldd r19,Z+23 + std Y+23,r18 + std Y+24,r19 + push r31 + push r30 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 + lsl r20 +61: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r8,Y+3 + ldd r9,Y+4 + ldd r10,Y+5 + ldd r11,Y+6 + ldd r12,Y+7 + ldd r13,Y+8 + std Y+1,r10 + std Y+2,r11 + std Y+3,r12 + std Y+4,r13 + mov r11,r18 + ldi r25,240 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+5,r10 + std Y+6,r11 + std Y+7,r12 + std Y+8,r13 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r8,Y+11 + ldd r9,Y+12 + ldd r10,Y+13 + ldd r11,Y+14 + ldd r12,Y+15 + ldd r13,Y+16 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r11,r18 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+13,r10 + std Y+14,r11 + std Y+15,r12 + std Y+16,r13 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r18,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r8,Y+19 + ldd r9,Y+20 + ldd r10,Y+21 + ldd r11,Y+22 + ldd r12,Y+23 + ldd r13,Y+24 + std Y+17,r10 + std Y+18,r11 + std Y+19,r12 + std Y+20,r13 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r11,r18 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+21,r10 + std Y+22,r11 + std Y+23,r12 + std Y+24,r13 + movw r18,r26 + movw r26,r2 + movw r2,r4 + movw r4,r6 + movw r6,r18 + eor r6,r4 + eor r7,r5 + eor r4,r26 + eor r5,r27 + eor r2,r4 + eor r3,r5 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + mov r0,r5 + mov r5,r4 + mov r4,r0 + mov r0,r7 + mov r7,r6 + mov r6,r0 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + ldd r18,Y+1 + ldd r19,Y+2 + eor r26,r18 + eor r27,r19 + ldd r18,Y+3 + ldd r19,Y+4 + eor r2,r18 + eor r3,r19 + ldd r18,Y+9 + ldd r19,Y+10 + eor r26,r18 + eor r27,r19 + ldd r18,Y+11 + ldd r19,Y+12 + eor r2,r18 + eor r3,r19 + ldd r18,Y+17 + ldd r19,Y+18 + eor r26,r18 + eor r27,r19 + ldd r18,Y+19 + ldd r19,Y+20 + eor r2,r18 + eor r3,r19 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r3,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r27,r18 + ldi r18,32 + eor r5,r18 + eor r26,r18 + ldi r30,lo8(table_6) + ldi r31,hi8(table_6) +#if defined(RAMPZ) + ldi r18,hh8(table_6) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + cp r22,r20 + breq 5268f + rjmp 61b +5268: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+24,r26 + std Z+25,r27 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Y+1 + ldd r19,Y+2 + st Z,r18 + std Z+1,r19 + ldd r18,Y+3 + ldd r19,Y+4 + std Z+2,r18 + std Z+3,r19 + ldd r18,Y+5 + ldd r19,Y+6 + std Z+4,r18 + std Z+5,r19 + ldd r18,Y+7 + ldd r19,Y+8 + std Z+6,r18 + std Z+7,r19 + ldd r18,Y+9 + ldd r19,Y+10 + std Z+8,r18 + std Z+9,r19 + ldd r18,Y+11 + ldd r19,Y+12 + std Z+10,r18 + std Z+11,r19 + ldd r18,Y+13 + ldd r19,Y+14 + std Z+12,r18 + std Z+13,r19 + ldd r18,Y+15 + ldd r19,Y+16 + std Z+14,r18 + std Z+15,r19 + ldd r18,Y+17 + ldd r19,Y+18 + std Z+16,r18 + std Z+17,r19 + ldd r18,Y+19 + ldd r19,Y+20 + std Z+18,r18 + std Z+19,r19 + ldd r18,Y+21 + ldd r19,Y+22 + std Z+20,r18 + std Z+21,r19 + ldd r18,Y+23 + ldd r19,Y+24 + std Z+22,r18 + std Z+23,r19 + adiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_inv_rounds, .-forkskinny_64_192_inv_rounds + + .text +.global forkskinny_64_192_forward_tk + .type forkskinny_64_192_forward_tk, @function +forkskinny_64_192_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r28,Z+6 + ldd r29,Z+7 + ldd r2,Z+8 + ldd r3,Z+9 + ldd r4,Z+10 + ldd r5,Z+11 + ldd r6,Z+12 + ldd r7,Z+13 + ldd r8,Z+14 + ldd r9,Z+15 + ldd r10,Z+16 + ldd r11,Z+17 + ldd r12,Z+18 + ldd r13,Z+19 + ldd r14,Z+20 + ldd r15,Z+21 + ldd r24,Z+22 + ldd r25,Z+23 + push r31 + push r30 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +27: + push r19 + push r18 + push r21 + push r20 + mov r19,r27 + swap r19 + andi r19,240 + mov r23,r28 + andi r23,15 + or r19,r23 + mov r18,r27 + andi r18,240 + mov r23,r29 + andi r23,15 + or r18,r23 + mov r21,r26 + andi r21,240 + swap r28 + andi r28,15 + or r21,r28 + mov r20,r29 + andi r20,240 + andi r26,15 + or r20,r26 + pop r28 + pop r29 + pop r26 + pop r27 + push r3 + push r2 + push r5 + push r4 + mov r3,r7 + swap r3 + ldi r17,240 + and r3,r17 + mov r23,r8 + andi r23,15 + or r3,r23 + mov r2,r7 + and r2,r17 + mov r23,r9 + andi r23,15 + or r2,r23 + mov r5,r6 + and r5,r17 + swap r8 + ldi r16,15 + and r8,r16 + or r5,r8 + mov r4,r9 + and r4,r17 + and r6,r16 + or r4,r6 + pop r8 + pop r9 + pop r6 + pop r7 + push r11 + push r10 + push r13 + push r12 + mov r11,r15 + swap r11 + and r11,r17 + mov r23,r24 + andi r23,15 + or r11,r23 + mov r10,r15 + and r10,r17 + mov r23,r25 + andi r23,15 + or r10,r23 + mov r13,r14 + and r13,r17 + swap r24 + andi r24,15 + or r13,r24 + mov r12,r25 + and r12,r17 + and r14,r16 + or r12,r14 + pop r24 + pop r25 + pop r14 + pop r15 + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + dec r22 + breq 5125f + rjmp 27b +5125: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r26 + std Z+5,r27 + std Z+6,r28 + std Z+7,r29 + std Z+8,r2 + std Z+9,r3 + std Z+10,r4 + std Z+11,r5 + std Z+12,r6 + std Z+13,r7 + std Z+14,r8 + std Z+15,r9 + std Z+16,r10 + std Z+17,r11 + std Z+18,r12 + std Z+19,r13 + std Z+20,r14 + std Z+21,r15 + std Z+22,r24 + std Z+23,r25 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_forward_tk, .-forkskinny_64_192_forward_tk + + .text +.global forkskinny_64_192_reverse_tk + .type forkskinny_64_192_reverse_tk, @function +forkskinny_64_192_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r28,Z+6 + ldd r29,Z+7 + ldd r2,Z+8 + ldd r3,Z+9 + ldd r4,Z+10 + ldd r5,Z+11 + ldd r6,Z+12 + ldd r7,Z+13 + ldd r8,Z+14 + ldd r9,Z+15 + ldd r10,Z+16 + ldd r11,Z+17 + ldd r12,Z+18 + ldd r13,Z+19 + ldd r14,Z+20 + ldd r15,Z+21 + ldd r24,Z+22 + ldd r25,Z+23 + push r31 + push r30 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +27: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + push r27 + push r26 + push r29 + push r28 + mov r27,r18 + andi r27,240 + mov r23,r19 + swap r23 + andi r23,15 + or r27,r23 + mov r26,r21 + andi r26,240 + mov r23,r20 + andi r23,15 + or r26,r23 + mov r29,r20 + andi r29,240 + andi r18,15 + or r29,r18 + mov r28,r21 + swap r28 + andi r28,240 + andi r19,15 + or r28,r19 + pop r20 + pop r21 + pop r18 + pop r19 + push r7 + push r6 + push r9 + push r8 + mov r7,r2 + ldi r17,240 + and r7,r17 + mov r23,r3 + swap r23 + andi r23,15 + or r7,r23 + mov r6,r5 + and r6,r17 + mov r23,r4 + andi r23,15 + or r6,r23 + mov r9,r4 + and r9,r17 + ldi r16,15 + and r2,r16 + or r9,r2 + mov r8,r5 + swap r8 + and r8,r17 + and r3,r16 + or r8,r3 + pop r4 + pop r5 + pop r2 + pop r3 + push r15 + push r14 + push r25 + push r24 + mov r15,r10 + and r15,r17 + mov r23,r11 + swap r23 + andi r23,15 + or r15,r23 + mov r14,r13 + and r14,r17 + mov r23,r12 + andi r23,15 + or r14,r23 + mov r25,r12 + andi r25,240 + and r10,r16 + or r25,r10 + mov r24,r13 + swap r24 + andi r24,240 + and r11,r16 + or r24,r11 + pop r12 + pop r13 + pop r10 + pop r11 + dec r22 + breq 5125f + rjmp 27b +5125: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r26 + std Z+5,r27 + std Z+6,r28 + std Z+7,r29 + std Z+8,r2 + std Z+9,r3 + std Z+10,r4 + std Z+11,r5 + std Z+12,r6 + std Z+13,r7 + std Z+14,r8 + std Z+15,r9 + std Z+16,r10 + std Z+17,r11 + std Z+18,r12 + std Z+19,r13 + std Z+20,r14 + std Z+21,r15 + std Z+22,r24 + std Z+23,r25 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_reverse_tk, .-forkskinny_64_192_reverse_tk + +#endif diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/rhys/internal-forkskinny.c b/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/rhys/internal-forkskinny.c index b050ff1..6e2ac55 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/rhys/internal-forkskinny.c +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/rhys/internal-forkskinny.c @@ -40,35 +40,10 @@ static unsigned char const RC[87] = { 0x4a, 0x14, 0x29, 0x52, 0x24, 0x48, 0x10 }; -/** - * \brief Number of rounds of ForkSkinny-128-256 before forking. - */ -#define FORKSKINNY_128_256_ROUNDS_BEFORE 21 - -/** - * \brief Number of rounds of ForkSkinny-128-256 after forking. - */ -#define FORKSKINNY_128_256_ROUNDS_AFTER 27 - -/** - * \brief State information for ForkSkinny-128-256. - */ -typedef struct -{ - uint32_t TK1[4]; /**< First part of the tweakey */ - uint32_t TK2[4]; /**< Second part of the tweakey */ - uint32_t S[4]; /**< Current block state */ +#if !defined(__AVR__) -} forkskinny_128_256_state_t; - -/** - * \brief Applies one round of ForkSkinny-128-256. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_256_round - (forkskinny_128_256_state_t *state, unsigned round) +void forkskinny_128_256_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -79,137 +54,52 @@ static void forkskinny_128_256_round s2 = state->S[2]; s3 = state->S[3]; - /* Apply the S-box to all cells in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny128_sbox(s0); + skinny128_sbox(s1); + skinny128_sbox(s2); + skinny128_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); + s2 ^= 0x02; + + /* Shift the cells in the rows right, which moves the cell + * values up closer to the MSB. That is, we do a left rotate + * on the word to rotate the cells in the word right */ + s1 = leftRotate8(s1); + s2 = leftRotate16(s2); + s3 = leftRotate24(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + + /* Permute TK1 and TK2 for the next round */ + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + } /* Save the local variables back to the state */ state->S[0] = s0; state->S[1] = s1; state->S[2] = s2; state->S[3] = s3; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(state->TK1); - skinny128_permute_tk(state->TK2); - skinny128_LFSR2(state->TK2[0]); - skinny128_LFSR2(state->TK2[1]); -} - -void forkskinny_128_256_encrypt - (const unsigned char key[32], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_256_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_128_256_ROUNDS_BEFORE; ++round) { - forkskinny_128_256_round(&state, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint32_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&state, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x08040201U; /* Branching constant */ - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - for (round = (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); ++round) { - forkskinny_128_256_round(&state, round); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&state, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - } } -/** - * \brief Applies one round of ForkSkinny-128-256 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_256_inv_round - (forkskinny_128_256_state_t *state, unsigned round) +void forkskinny_128_256_inv_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -220,39 +110,42 @@ static void forkskinny_128_256_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Permute TK1 and TK2 for the next round */ - skinny128_inv_LFSR2(state->TK2[0]); - skinny128_inv_LFSR2(state->TK2[1]); - skinny128_inv_permute_tk(state->TK1); - skinny128_inv_permute_tk(state->TK2); - - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left, which moves the cell - * values down closer to the LSB. That is, we do a right - * rotate on the word to rotate the cells in the word left */ - s1 = rightRotate8(s1); - s2 = rightRotate16(s2); - s3 = rightRotate24(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1 and TK2 for the next round */ + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left, which moves the cell + * values down closer to the LSB. That is, we do a right + * rotate on the word to rotate the cells in the word left */ + s1 = rightRotate8(s1); + s2 = rightRotate16(s2); + s3 = rightRotate24(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); + s2 ^= 0x02; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny128_inv_sbox(s0); + skinny128_inv_sbox(s1); + skinny128_inv_sbox(s2); + skinny128_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -261,115 +154,64 @@ static void forkskinny_128_256_inv_round state->S[3] = s3; } -void forkskinny_128_256_decrypt - (const unsigned char key[32], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_128_256_forward_tk + (forkskinny_128_256_state_t *state, unsigned rounds) { - forkskinny_128_256_state_t state; - forkskinny_128_256_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); ++round) { - skinny128_permute_tk(state.TK1); - skinny128_permute_tk(state.TK2); - skinny128_LFSR2(state.TK2[0]); - skinny128_LFSR2(state.TK2[1]); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR2(state->TK2[2]); + skinny128_LFSR2(state->TK2[3]); + } + rounds -= 16; } - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); - round > (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); --round) { - forkskinny_128_256_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + --rounds; } +} - /* Remove the branching constant */ - state.S[0] ^= 0x08040201U; - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_128_256_ROUNDS_AFTER; ++round) { - skinny128_inv_LFSR2(state.TK2[0]); - skinny128_inv_LFSR2(state.TK2[1]); - skinny128_inv_permute_tk(state.TK1); - skinny128_inv_permute_tk(state.TK2); +void forkskinny_128_256_reverse_tk + (forkskinny_128_256_state_t *state, unsigned rounds) +{ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR2(state->TK2[2]); + skinny128_inv_LFSR2(state->TK2[3]); + } + rounds -= 16; } - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_128_256_inv_round(&state, round - 1); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&fstate, round); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + --rounds; } - le_store_word32(output_right, fstate.S[0]); - le_store_word32(output_right + 4, fstate.S[1]); - le_store_word32(output_right + 8, fstate.S[2]); - le_store_word32(output_right + 12, fstate.S[3]); } -/** - * \brief Number of rounds of ForkSkinny-128-384 before forking. - */ -#define FORKSKINNY_128_384_ROUNDS_BEFORE 25 - -/** - * \brief Number of rounds of ForkSkinny-128-384 after forking. - */ -#define FORKSKINNY_128_384_ROUNDS_AFTER 31 - -/** - * \brief State information for ForkSkinny-128-384. - */ -typedef struct -{ - uint32_t TK1[4]; /**< First part of the tweakey */ - uint32_t TK2[4]; /**< Second part of the tweakey */ - uint32_t TK3[4]; /**< Third part of the tweakey */ - uint32_t S[4]; /**< Current block state */ - -} forkskinny_128_384_state_t; - -/** - * \brief Applies one round of ForkSkinny-128-384. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_384_round - (forkskinny_128_384_state_t *state, unsigned round) +void forkskinny_128_384_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -380,145 +222,56 @@ static void forkskinny_128_384_round s2 = state->S[2]; s3 = state->S[3]; - /* Apply the S-box to all cells in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - (rc & 0x0F) ^ 0x00020000; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny128_sbox(s0); + skinny128_sbox(s1); + skinny128_sbox(s2); + skinny128_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + (rc & 0x0F) ^ 0x00020000; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); + s2 ^= 0x02; + + /* Shift the cells in the rows right, which moves the cell + * values up closer to the MSB. That is, we do a left rotate + * on the word to rotate the cells in the word right */ + s1 = leftRotate8(s1); + s2 = leftRotate16(s2); + s3 = leftRotate24(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + + /* Permute TK1, TK2, and TK3 for the next round */ + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_permute_tk(state->TK3); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + } /* Save the local variables back to the state */ state->S[0] = s0; state->S[1] = s1; state->S[2] = s2; state->S[3] = s3; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny128_permute_tk(state->TK1); - skinny128_permute_tk(state->TK2); - skinny128_permute_tk(state->TK3); - skinny128_LFSR2(state->TK2[0]); - skinny128_LFSR2(state->TK2[1]); - skinny128_LFSR3(state->TK3[0]); - skinny128_LFSR3(state->TK3[1]); } -void forkskinny_128_384_encrypt - (const unsigned char key[48], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_384_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.TK3[0] = le_load_word32(key + 32); - state.TK3[1] = le_load_word32(key + 36); - state.TK3[2] = le_load_word32(key + 40); - state.TK3[3] = le_load_word32(key + 44); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_128_384_ROUNDS_BEFORE; ++round) { - forkskinny_128_384_round(&state, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint32_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&state, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x08040201U; /* Branching constant */ - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - for (round = (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); ++round) { - forkskinny_128_384_round(&state, round); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&state, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - } -} - -/** - * \brief Applies one round of ForkSkinny-128-384 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_384_inv_round - (forkskinny_128_384_state_t *state, unsigned round) +void forkskinny_128_384_inv_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -529,43 +282,46 @@ static void forkskinny_128_384_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Permute TK1 and TK2 for the next round */ - skinny128_inv_LFSR2(state->TK2[0]); - skinny128_inv_LFSR2(state->TK2[1]); - skinny128_inv_LFSR3(state->TK3[0]); - skinny128_inv_LFSR3(state->TK3[1]); - skinny128_inv_permute_tk(state->TK1); - skinny128_inv_permute_tk(state->TK2); - skinny128_inv_permute_tk(state->TK3); - - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left, which moves the cell - * values down closer to the LSB. That is, we do a right - * rotate on the word to rotate the cells in the word left */ - s1 = rightRotate8(s1); - s2 = rightRotate16(s2); - s3 = rightRotate24(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - (rc & 0x0F) ^ 0x00020000; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1 and TK2 for the next round */ + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + skinny128_inv_permute_tk(state->TK3); + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left, which moves the cell + * values down closer to the LSB. That is, we do a right + * rotate on the word to rotate the cells in the word left */ + s1 = rightRotate8(s1); + s2 = rightRotate16(s2); + s3 = rightRotate24(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + (rc & 0x0F) ^ 0x00020000; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); + s2 ^= 0x02; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny128_inv_sbox(s0); + skinny128_inv_sbox(s1); + skinny128_inv_sbox(s2); + skinny128_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -574,128 +330,78 @@ static void forkskinny_128_384_inv_round state->S[3] = s3; } -void forkskinny_128_384_decrypt - (const unsigned char key[48], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_128_384_forward_tk + (forkskinny_128_384_state_t *state, unsigned rounds) { - forkskinny_128_384_state_t state; - forkskinny_128_384_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.TK3[0] = le_load_word32(key + 32); - state.TK3[1] = le_load_word32(key + 36); - state.TK3[2] = le_load_word32(key + 40); - state.TK3[3] = le_load_word32(key + 44); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); ++round) { - skinny128_permute_tk(state.TK1); - skinny128_permute_tk(state.TK2); - skinny128_permute_tk(state.TK3); - skinny128_LFSR2(state.TK2[0]); - skinny128_LFSR2(state.TK2[1]); - skinny128_LFSR3(state.TK3[0]); - skinny128_LFSR3(state.TK3[1]); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR2(state->TK2[2]); + skinny128_LFSR2(state->TK2[3]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + skinny128_LFSR3(state->TK3[2]); + skinny128_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); - round > (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); --round) { - forkskinny_128_384_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_permute_tk(state->TK3); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + --rounds; } +} - /* Remove the branching constant */ - state.S[0] ^= 0x08040201U; - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_128_384_ROUNDS_AFTER; ++round) { - skinny128_inv_LFSR2(state.TK2[0]); - skinny128_inv_LFSR2(state.TK2[1]); - skinny128_inv_LFSR3(state.TK3[0]); - skinny128_inv_LFSR3(state.TK3[1]); - skinny128_inv_permute_tk(state.TK1); - skinny128_inv_permute_tk(state.TK2); - skinny128_inv_permute_tk(state.TK3); +void forkskinny_128_384_reverse_tk + (forkskinny_128_384_state_t *state, unsigned rounds) +{ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR2(state->TK2[2]); + skinny128_inv_LFSR2(state->TK2[3]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_LFSR3(state->TK3[2]); + skinny128_inv_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_128_384_inv_round(&state, round - 1); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&fstate, round); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + skinny128_inv_permute_tk(state->TK3); + --rounds; } - le_store_word32(output_right, fstate.S[0]); - le_store_word32(output_right + 4, fstate.S[1]); - le_store_word32(output_right + 8, fstate.S[2]); - le_store_word32(output_right + 12, fstate.S[3]); } -/** - * \brief Number of rounds of ForkSkinny-64-192 before forking. - */ -#define FORKSKINNY_64_192_ROUNDS_BEFORE 17 - -/** - * \brief Number of rounds of ForkSkinny-64-192 after forking. - */ -#define FORKSKINNY_64_192_ROUNDS_AFTER 23 - -/** - * \brief State information for ForkSkinny-64-192. - */ -typedef struct -{ - uint16_t TK1[4]; /**< First part of the tweakey */ - uint16_t TK2[4]; /**< Second part of the tweakey */ - uint16_t TK3[4]; /**< Third part of the tweakey */ - uint16_t S[4]; /**< Current block state */ - -} forkskinny_64_192_state_t; - -/** - * \brief Applies one round of ForkSkinny-64-192. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - * - * Note: The cells of each row are order in big-endian nibble order - * so it is easiest to manage the rows in bit-endian byte order. - */ -static void forkskinny_64_192_round - (forkskinny_64_192_state_t *state, unsigned round) +void forkskinny_64_192_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last) { uint16_t s0, s1, s2, s3, temp; uint8_t rc; @@ -706,144 +412,55 @@ static void forkskinny_64_192_round s2 = state->S[2]; s3 = state->S[3]; - /* Apply the S-box to all cells in the state */ - skinny64_sbox(s0); - skinny64_sbox(s1); - skinny64_sbox(s2); - skinny64_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - ((rc & 0x0F) << 12) ^ 0x0020; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ - ((rc & 0x70) << 8); - s2 ^= 0x2000; - - /* Shift the cells in the rows right */ - s1 = rightRotate4_16(s1); - s2 = rightRotate8_16(s2); - s3 = rightRotate12_16(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny64_sbox(s0); + skinny64_sbox(s1); + skinny64_sbox(s2); + skinny64_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + ((rc & 0x0F) << 12) ^ 0x0020; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ + ((rc & 0x70) << 8); + s2 ^= 0x2000; + + /* Shift the cells in the rows right */ + s1 = rightRotate4_16(s1); + s2 = rightRotate8_16(s2); + s3 = rightRotate12_16(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + + /* Permute TK1, TK2, and TK3 for the next round */ + skinny64_permute_tk(state->TK1); + skinny64_permute_tk(state->TK2); + skinny64_permute_tk(state->TK3); + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + } /* Save the local variables back to the state */ state->S[0] = s0; state->S[1] = s1; state->S[2] = s2; state->S[3] = s3; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny64_permute_tk(state->TK1); - skinny64_permute_tk(state->TK2); - skinny64_permute_tk(state->TK3); - skinny64_LFSR2(state->TK2[0]); - skinny64_LFSR2(state->TK2[1]); - skinny64_LFSR3(state->TK3[0]); - skinny64_LFSR3(state->TK3[1]); } -void forkskinny_64_192_encrypt - (const unsigned char key[24], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_64_192_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = be_load_word16(key); - state.TK1[1] = be_load_word16(key + 2); - state.TK1[2] = be_load_word16(key + 4); - state.TK1[3] = be_load_word16(key + 6); - state.TK2[0] = be_load_word16(key + 8); - state.TK2[1] = be_load_word16(key + 10); - state.TK2[2] = be_load_word16(key + 12); - state.TK2[3] = be_load_word16(key + 14); - state.TK3[0] = be_load_word16(key + 16); - state.TK3[1] = be_load_word16(key + 18); - state.TK3[2] = be_load_word16(key + 20); - state.TK3[3] = be_load_word16(key + 22); - state.S[0] = be_load_word16(input); - state.S[1] = be_load_word16(input + 2); - state.S[2] = be_load_word16(input + 4); - state.S[3] = be_load_word16(input + 6); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_64_192_ROUNDS_BEFORE; ++round) { - forkskinny_64_192_round(&state, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint16_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_right, state.S[0]); - be_store_word16(output_right + 2, state.S[1]); - be_store_word16(output_right + 4, state.S[2]); - be_store_word16(output_right + 6, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x1249U; /* Branching constant */ - state.S[1] ^= 0x36daU; - state.S[2] ^= 0x5b7fU; - state.S[3] ^= 0xec81U; - for (round = (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_left, state.S[0]); - be_store_word16(output_left + 2, state.S[1]); - be_store_word16(output_left + 4, state.S[2]); - be_store_word16(output_left + 6, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_right, state.S[0]); - be_store_word16(output_right + 2, state.S[1]); - be_store_word16(output_right + 4, state.S[2]); - be_store_word16(output_right + 6, state.S[3]); - } -} - -/** - * \brief Applies one round of ForkSkinny-64-192 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_64_192_inv_round - (forkskinny_64_192_state_t *state, unsigned round) +void forkskinny_64_192_inv_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last) { uint16_t s0, s1, s2, s3, temp; uint8_t rc; @@ -854,42 +471,45 @@ static void forkskinny_64_192_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Permute TK1, TK2, and TK3 for the next round */ - skinny64_inv_LFSR2(state->TK2[0]); - skinny64_inv_LFSR2(state->TK2[1]); - skinny64_inv_LFSR3(state->TK3[0]); - skinny64_inv_LFSR3(state->TK3[1]); - skinny64_inv_permute_tk(state->TK1); - skinny64_inv_permute_tk(state->TK2); - skinny64_inv_permute_tk(state->TK3); - - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left */ - s1 = leftRotate4_16(s1); - s2 = leftRotate8_16(s2); - s3 = leftRotate12_16(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - ((rc & 0x0F) << 12) ^ 0x0020; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ - ((rc & 0x70) << 8); - s2 ^= 0x2000; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny64_inv_sbox(s0); - skinny64_inv_sbox(s1); - skinny64_inv_sbox(s2); - skinny64_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1, TK2, and TK3 for the next round */ + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_permute_tk(state->TK1); + skinny64_inv_permute_tk(state->TK2); + skinny64_inv_permute_tk(state->TK3); + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left */ + s1 = leftRotate4_16(s1); + s2 = leftRotate8_16(s2); + s3 = leftRotate12_16(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + ((rc & 0x0F) << 12) ^ 0x0020; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ + ((rc & 0x70) << 8); + s2 ^= 0x2000; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny64_inv_sbox(s0); + skinny64_inv_sbox(s1); + skinny64_inv_sbox(s2); + skinny64_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -898,91 +518,74 @@ static void forkskinny_64_192_inv_round state->S[3] = s3; } -void forkskinny_64_192_decrypt - (const unsigned char key[24], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_64_192_forward_tk + (forkskinny_64_192_state_t *state, unsigned rounds) { - forkskinny_64_192_state_t state; - forkskinny_64_192_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = be_load_word16(key); - state.TK1[1] = be_load_word16(key + 2); - state.TK1[2] = be_load_word16(key + 4); - state.TK1[3] = be_load_word16(key + 6); - state.TK2[0] = be_load_word16(key + 8); - state.TK2[1] = be_load_word16(key + 10); - state.TK2[2] = be_load_word16(key + 12); - state.TK2[3] = be_load_word16(key + 14); - state.TK3[0] = be_load_word16(key + 16); - state.TK3[1] = be_load_word16(key + 18); - state.TK3[2] = be_load_word16(key + 20); - state.TK3[3] = be_load_word16(key + 22); - state.S[0] = be_load_word16(input); - state.S[1] = be_load_word16(input + 2); - state.S[2] = be_load_word16(input + 4); - state.S[3] = be_load_word16(input + 6); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); ++round) { - skinny64_permute_tk(state.TK1); - skinny64_permute_tk(state.TK2); - skinny64_permute_tk(state.TK3); - skinny64_LFSR2(state.TK2[0]); - skinny64_LFSR2(state.TK2[1]); - skinny64_LFSR3(state.TK3[0]); - skinny64_LFSR3(state.TK3[1]); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny64_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR2(state->TK2[2]); + skinny64_LFSR2(state->TK2[3]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + skinny64_LFSR3(state->TK3[2]); + skinny64_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); - round > (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); --round) { - forkskinny_64_192_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny64_permute_tk(state->TK1); + skinny64_permute_tk(state->TK2); + skinny64_permute_tk(state->TK3); + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + --rounds; } +} - /* Remove the branching constant */ - state.S[0] ^= 0x1249U; - state.S[1] ^= 0x36daU; - state.S[2] ^= 0x5b7fU; - state.S[3] ^= 0xec81U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_64_192_ROUNDS_AFTER; ++round) { - skinny64_inv_LFSR2(state.TK2[0]); - skinny64_inv_LFSR2(state.TK2[1]); - skinny64_inv_LFSR3(state.TK3[0]); - skinny64_inv_LFSR3(state.TK3[1]); - skinny64_inv_permute_tk(state.TK1); - skinny64_inv_permute_tk(state.TK2); - skinny64_inv_permute_tk(state.TK3); +void forkskinny_64_192_reverse_tk + (forkskinny_64_192_state_t *state, unsigned rounds) +{ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny64_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR2(state->TK2[2]); + skinny64_inv_LFSR2(state->TK2[3]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_LFSR3(state->TK3[2]); + skinny64_inv_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_64_192_inv_round(&state, round - 1); - } - be_store_word16(output_left, state.S[0]); - be_store_word16(output_left + 2, state.S[1]); - be_store_word16(output_left + 4, state.S[2]); - be_store_word16(output_left + 6, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&fstate, round); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_permute_tk(state->TK1); + skinny64_inv_permute_tk(state->TK2); + skinny64_inv_permute_tk(state->TK3); + --rounds; } - be_store_word16(output_right, fstate.S[0]); - be_store_word16(output_right + 2, fstate.S[1]); - be_store_word16(output_right + 4, fstate.S[2]); - be_store_word16(output_right + 6, fstate.S[3]); } + +#endif /* !__AVR__ */ diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/rhys/internal-forkskinny.h b/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/rhys/internal-forkskinny.h index 0c1a707..e3014d3 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/rhys/internal-forkskinny.h +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/rhys/internal-forkskinny.h @@ -23,6 +23,8 @@ #ifndef LW_INTERNAL_FORKSKINNY_H #define LW_INTERNAL_FORKSKINNY_H +#include "internal-util.h" + /** * \file internal-forkskinny.h * \brief ForkSkinny block cipher family. @@ -39,6 +41,158 @@ extern "C" { #endif /** + * \brief State information for ForkSkinny-128-256. + */ +typedef struct +{ + uint32_t TK1[4]; /**< First part of the tweakey */ + uint32_t TK2[4]; /**< Second part of the tweakey */ + uint32_t S[4]; /**< Current block state */ + +} forkskinny_128_256_state_t; + +/** + * \brief State information for ForkSkinny-128-384. + */ +typedef struct +{ + uint32_t TK1[4]; /**< First part of the tweakey */ + uint32_t TK2[4]; /**< Second part of the tweakey */ + uint32_t TK3[4]; /**< Third part of the tweakey */ + uint32_t S[4]; /**< Current block state */ + +} forkskinny_128_384_state_t; + +/** + * \brief State information for ForkSkinny-64-192. + */ +typedef struct +{ + uint16_t TK1[4]; /**< First part of the tweakey */ + uint16_t TK2[4]; /**< Second part of the tweakey */ + uint16_t TK3[4]; /**< Third part of the tweakey */ + uint16_t S[4]; /**< Current block state */ + +} forkskinny_64_192_state_t; + +/** + * \brief Applies several rounds of ForkSkinny-128-256. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + */ +void forkskinny_128_256_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-128-256 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_128_256_inv_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-128-256. + * + * \param state Points to the ForkSkinny-128-256 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_128_256_forward_tk + (forkskinny_128_256_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-128-256. + * + * \param state Points to the ForkSkinny-128-256 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_128_256_reverse_tk + (forkskinny_128_256_state_t *state, unsigned rounds); + +/** + * \brief Applies several rounds of ForkSkinny-128-384. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + */ +void forkskinny_128_384_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-128-384 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_128_384_inv_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-128-384. + * + * \param state Points to the ForkSkinny-128-384 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_128_384_forward_tk + (forkskinny_128_384_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-128-384. + * + * \param state Points to the ForkSkinny-128-384 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_128_384_reverse_tk + (forkskinny_128_384_state_t *state, unsigned rounds); + +/** + * \brief Applies several rounds of ForkSkinny-64-192. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + * + * Note: The cells of each row are ordered in big-endian nibble order + * so it is simplest to manage the rows in big-endian byte order. + */ +void forkskinny_64_192_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-64-192 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_64_192_inv_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-64-192. + * + * \param state Points to the ForkSkinny-64-192 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_64_192_forward_tk + (forkskinny_64_192_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-64-192. + * + * \param state Points to the ForkSkinny-64-192 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_64_192_reverse_tk + (forkskinny_64_192_state_t *state, unsigned rounds); + +/** * \brief Encrypts a block of plaintext with ForkSkinny-128-256. * * \param key 256-bit tweakey for ForkSkinny-128-256. diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/rhys/internal-skinnyutil.h b/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/rhys/internal-skinnyutil.h index 83136cb..8a5296d 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/rhys/internal-skinnyutil.h +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/rhys/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/rhys/forkae.c b/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/rhys/forkae.c index 4a9671a..49e7610 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/rhys/forkae.c +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/rhys/forkae.c @@ -22,7 +22,6 @@ #include "forkae.h" #include "internal-forkskinny.h" -#include "internal-util.h" #include aead_cipher_t const forkae_paef_64_192_cipher = { @@ -138,3 +137,476 @@ aead_cipher_t const forkae_saef_128_256_cipher = { #define FORKAE_TWEAKEY_REDUCED_SIZE 32 #define FORKAE_BLOCK_FUNC forkskinny_128_256 #include "internal-forkae-saef.h" + +/* Helper functions to implement the forking encrypt/decrypt block operations + * on top of the basic "perform N rounds" functions in internal-forkskinny.c */ + +/** + * \brief Number of rounds of ForkSkinny-128-256 before forking. + */ +#define FORKSKINNY_128_256_ROUNDS_BEFORE 21 + +/** + * \brief Number of rounds of ForkSkinny-128-256 after forking. + */ +#define FORKSKINNY_128_256_ROUNDS_AFTER 27 + +void forkskinny_128_256_encrypt + (const unsigned char key[32], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_256_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Run all of the rounds before the forking point */ + forkskinny_128_256_rounds(&state, 0, FORKSKINNY_128_256_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint32_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x08040201U; /* Branching constant */ + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + } +} + +void forkskinny_128_256_decrypt + (const unsigned char key[32], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_256_state_t state; + forkskinny_128_256_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_128_256_forward_tk + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_128_256_inv_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x08040201U; + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_128_256_reverse_tk(&state, FORKSKINNY_128_256_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_128_256_inv_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, 0); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_128_256_rounds + (&fstate, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, fstate.S[0]); + le_store_word32(output_right + 4, fstate.S[1]); + le_store_word32(output_right + 8, fstate.S[2]); + le_store_word32(output_right + 12, fstate.S[3]); +} + +/** + * \brief Number of rounds of ForkSkinny-128-384 before forking. + */ +#define FORKSKINNY_128_384_ROUNDS_BEFORE 25 + +/** + * \brief Number of rounds of ForkSkinny-128-384 after forking. + */ +#define FORKSKINNY_128_384_ROUNDS_AFTER 31 + +void forkskinny_128_384_encrypt + (const unsigned char key[48], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_384_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.TK3[0] = le_load_word32(key + 32); + state.TK3[1] = le_load_word32(key + 36); + state.TK3[2] = le_load_word32(key + 40); + state.TK3[3] = le_load_word32(key + 44); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Run all of the rounds before the forking point */ + forkskinny_128_384_rounds(&state, 0, FORKSKINNY_128_384_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint32_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x08040201U; /* Branching constant */ + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + } +} + +void forkskinny_128_384_decrypt + (const unsigned char key[48], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_384_state_t state; + forkskinny_128_384_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.TK3[0] = le_load_word32(key + 32); + state.TK3[1] = le_load_word32(key + 36); + state.TK3[2] = le_load_word32(key + 40); + state.TK3[3] = le_load_word32(key + 44); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_128_384_forward_tk + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_128_384_inv_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x08040201U; + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_128_384_reverse_tk(&state, FORKSKINNY_128_384_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_128_384_inv_rounds(&state, FORKSKINNY_128_384_ROUNDS_BEFORE, 0); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_128_384_rounds + (&fstate, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, fstate.S[0]); + le_store_word32(output_right + 4, fstate.S[1]); + le_store_word32(output_right + 8, fstate.S[2]); + le_store_word32(output_right + 12, fstate.S[3]); +} + +/** + * \brief Number of rounds of ForkSkinny-64-192 before forking. + */ +#define FORKSKINNY_64_192_ROUNDS_BEFORE 17 + +/** + * \brief Number of rounds of ForkSkinny-64-192 after forking. + */ +#define FORKSKINNY_64_192_ROUNDS_AFTER 23 + +void forkskinny_64_192_encrypt + (const unsigned char key[24], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_64_192_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = be_load_word16(key); + state.TK1[1] = be_load_word16(key + 2); + state.TK1[2] = be_load_word16(key + 4); + state.TK1[3] = be_load_word16(key + 6); + state.TK2[0] = be_load_word16(key + 8); + state.TK2[1] = be_load_word16(key + 10); + state.TK2[2] = be_load_word16(key + 12); + state.TK2[3] = be_load_word16(key + 14); + state.TK3[0] = be_load_word16(key + 16); + state.TK3[1] = be_load_word16(key + 18); + state.TK3[2] = be_load_word16(key + 20); + state.TK3[3] = be_load_word16(key + 22); + state.S[0] = be_load_word16(input); + state.S[1] = be_load_word16(input + 2); + state.S[2] = be_load_word16(input + 4); + state.S[3] = be_load_word16(input + 6); + + /* Run all of the rounds before the forking point */ + forkskinny_64_192_rounds(&state, 0, FORKSKINNY_64_192_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint16_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, state.S[0]); + be_store_word16(output_right + 2, state.S[1]); + be_store_word16(output_right + 4, state.S[2]); + be_store_word16(output_right + 6, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x1249U; /* Branching constant */ + state.S[1] ^= 0x36daU; + state.S[2] ^= 0x5b7fU; + state.S[3] ^= 0xec81U; + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2); + be_store_word16(output_left, state.S[0]); + be_store_word16(output_left + 2, state.S[1]); + be_store_word16(output_left + 4, state.S[2]); + be_store_word16(output_left + 6, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, state.S[0]); + be_store_word16(output_right + 2, state.S[1]); + be_store_word16(output_right + 4, state.S[2]); + be_store_word16(output_right + 6, state.S[3]); + } +} + +void forkskinny_64_192_decrypt + (const unsigned char key[24], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_64_192_state_t state; + forkskinny_64_192_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = be_load_word16(key); + state.TK1[1] = be_load_word16(key + 2); + state.TK1[2] = be_load_word16(key + 4); + state.TK1[3] = be_load_word16(key + 6); + state.TK2[0] = be_load_word16(key + 8); + state.TK2[1] = be_load_word16(key + 10); + state.TK2[2] = be_load_word16(key + 12); + state.TK2[3] = be_load_word16(key + 14); + state.TK3[0] = be_load_word16(key + 16); + state.TK3[1] = be_load_word16(key + 18); + state.TK3[2] = be_load_word16(key + 20); + state.TK3[3] = be_load_word16(key + 22); + state.S[0] = be_load_word16(input); + state.S[1] = be_load_word16(input + 2); + state.S[2] = be_load_word16(input + 4); + state.S[3] = be_load_word16(input + 6); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_64_192_forward_tk + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_64_192_inv_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x1249U; + state.S[1] ^= 0x36daU; + state.S[2] ^= 0x5b7fU; + state.S[3] ^= 0xec81U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_64_192_reverse_tk(&state, FORKSKINNY_64_192_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_64_192_inv_rounds(&state, FORKSKINNY_64_192_ROUNDS_BEFORE, 0); + be_store_word16(output_left, state.S[0]); + be_store_word16(output_left + 2, state.S[1]); + be_store_word16(output_left + 4, state.S[2]); + be_store_word16(output_left + 6, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_64_192_rounds + (&fstate, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, fstate.S[0]); + be_store_word16(output_right + 2, fstate.S[1]); + be_store_word16(output_right + 4, fstate.S[2]); + be_store_word16(output_right + 6, fstate.S[3]); +} diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/rhys/internal-forkskinny-avr.S b/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/rhys/internal-forkskinny-avr.S new file mode 100644 index 0000000..c7e0b37 --- /dev/null +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/rhys/internal-forkskinny-avr.S @@ -0,0 +1,8880 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_0, @object + .size table_0, 256 +table_0: + .byte 101 + .byte 76 + .byte 106 + .byte 66 + .byte 75 + .byte 99 + .byte 67 + .byte 107 + .byte 85 + .byte 117 + .byte 90 + .byte 122 + .byte 83 + .byte 115 + .byte 91 + .byte 123 + .byte 53 + .byte 140 + .byte 58 + .byte 129 + .byte 137 + .byte 51 + .byte 128 + .byte 59 + .byte 149 + .byte 37 + .byte 152 + .byte 42 + .byte 144 + .byte 35 + .byte 153 + .byte 43 + .byte 229 + .byte 204 + .byte 232 + .byte 193 + .byte 201 + .byte 224 + .byte 192 + .byte 233 + .byte 213 + .byte 245 + .byte 216 + .byte 248 + .byte 208 + .byte 240 + .byte 217 + .byte 249 + .byte 165 + .byte 28 + .byte 168 + .byte 18 + .byte 27 + .byte 160 + .byte 19 + .byte 169 + .byte 5 + .byte 181 + .byte 10 + .byte 184 + .byte 3 + .byte 176 + .byte 11 + .byte 185 + .byte 50 + .byte 136 + .byte 60 + .byte 133 + .byte 141 + .byte 52 + .byte 132 + .byte 61 + .byte 145 + .byte 34 + .byte 156 + .byte 44 + .byte 148 + .byte 36 + .byte 157 + .byte 45 + .byte 98 + .byte 74 + .byte 108 + .byte 69 + .byte 77 + .byte 100 + .byte 68 + .byte 109 + .byte 82 + .byte 114 + .byte 92 + .byte 124 + .byte 84 + .byte 116 + .byte 93 + .byte 125 + .byte 161 + .byte 26 + .byte 172 + .byte 21 + .byte 29 + .byte 164 + .byte 20 + .byte 173 + .byte 2 + .byte 177 + .byte 12 + .byte 188 + .byte 4 + .byte 180 + .byte 13 + .byte 189 + .byte 225 + .byte 200 + .byte 236 + .byte 197 + .byte 205 + .byte 228 + .byte 196 + .byte 237 + .byte 209 + .byte 241 + .byte 220 + .byte 252 + .byte 212 + .byte 244 + .byte 221 + .byte 253 + .byte 54 + .byte 142 + .byte 56 + .byte 130 + .byte 139 + .byte 48 + .byte 131 + .byte 57 + .byte 150 + .byte 38 + .byte 154 + .byte 40 + .byte 147 + .byte 32 + .byte 155 + .byte 41 + .byte 102 + .byte 78 + .byte 104 + .byte 65 + .byte 73 + .byte 96 + .byte 64 + .byte 105 + .byte 86 + .byte 118 + .byte 88 + .byte 120 + .byte 80 + .byte 112 + .byte 89 + .byte 121 + .byte 166 + .byte 30 + .byte 170 + .byte 17 + .byte 25 + .byte 163 + .byte 16 + .byte 171 + .byte 6 + .byte 182 + .byte 8 + .byte 186 + .byte 0 + .byte 179 + .byte 9 + .byte 187 + .byte 230 + .byte 206 + .byte 234 + .byte 194 + .byte 203 + .byte 227 + .byte 195 + .byte 235 + .byte 214 + .byte 246 + .byte 218 + .byte 250 + .byte 211 + .byte 243 + .byte 219 + .byte 251 + .byte 49 + .byte 138 + .byte 62 + .byte 134 + .byte 143 + .byte 55 + .byte 135 + .byte 63 + .byte 146 + .byte 33 + .byte 158 + .byte 46 + .byte 151 + .byte 39 + .byte 159 + .byte 47 + .byte 97 + .byte 72 + .byte 110 + .byte 70 + .byte 79 + .byte 103 + .byte 71 + .byte 111 + .byte 81 + .byte 113 + .byte 94 + .byte 126 + .byte 87 + .byte 119 + .byte 95 + .byte 127 + .byte 162 + .byte 24 + .byte 174 + .byte 22 + .byte 31 + .byte 167 + .byte 23 + .byte 175 + .byte 1 + .byte 178 + .byte 14 + .byte 190 + .byte 7 + .byte 183 + .byte 15 + .byte 191 + .byte 226 + .byte 202 + .byte 238 + .byte 198 + .byte 207 + .byte 231 + .byte 199 + .byte 239 + .byte 210 + .byte 242 + .byte 222 + .byte 254 + .byte 215 + .byte 247 + .byte 223 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_1, @object + .size table_1, 256 +table_1: + .byte 172 + .byte 232 + .byte 104 + .byte 60 + .byte 108 + .byte 56 + .byte 168 + .byte 236 + .byte 170 + .byte 174 + .byte 58 + .byte 62 + .byte 106 + .byte 110 + .byte 234 + .byte 238 + .byte 166 + .byte 163 + .byte 51 + .byte 54 + .byte 102 + .byte 99 + .byte 227 + .byte 230 + .byte 225 + .byte 164 + .byte 97 + .byte 52 + .byte 49 + .byte 100 + .byte 161 + .byte 228 + .byte 141 + .byte 201 + .byte 73 + .byte 29 + .byte 77 + .byte 25 + .byte 137 + .byte 205 + .byte 139 + .byte 143 + .byte 27 + .byte 31 + .byte 75 + .byte 79 + .byte 203 + .byte 207 + .byte 133 + .byte 192 + .byte 64 + .byte 21 + .byte 69 + .byte 16 + .byte 128 + .byte 197 + .byte 130 + .byte 135 + .byte 18 + .byte 23 + .byte 66 + .byte 71 + .byte 194 + .byte 199 + .byte 150 + .byte 147 + .byte 3 + .byte 6 + .byte 86 + .byte 83 + .byte 211 + .byte 214 + .byte 209 + .byte 148 + .byte 81 + .byte 4 + .byte 1 + .byte 84 + .byte 145 + .byte 212 + .byte 156 + .byte 216 + .byte 88 + .byte 12 + .byte 92 + .byte 8 + .byte 152 + .byte 220 + .byte 154 + .byte 158 + .byte 10 + .byte 14 + .byte 90 + .byte 94 + .byte 218 + .byte 222 + .byte 149 + .byte 208 + .byte 80 + .byte 5 + .byte 85 + .byte 0 + .byte 144 + .byte 213 + .byte 146 + .byte 151 + .byte 2 + .byte 7 + .byte 82 + .byte 87 + .byte 210 + .byte 215 + .byte 157 + .byte 217 + .byte 89 + .byte 13 + .byte 93 + .byte 9 + .byte 153 + .byte 221 + .byte 155 + .byte 159 + .byte 11 + .byte 15 + .byte 91 + .byte 95 + .byte 219 + .byte 223 + .byte 22 + .byte 19 + .byte 131 + .byte 134 + .byte 70 + .byte 67 + .byte 195 + .byte 198 + .byte 65 + .byte 20 + .byte 193 + .byte 132 + .byte 17 + .byte 68 + .byte 129 + .byte 196 + .byte 28 + .byte 72 + .byte 200 + .byte 140 + .byte 76 + .byte 24 + .byte 136 + .byte 204 + .byte 26 + .byte 30 + .byte 138 + .byte 142 + .byte 74 + .byte 78 + .byte 202 + .byte 206 + .byte 53 + .byte 96 + .byte 224 + .byte 165 + .byte 101 + .byte 48 + .byte 160 + .byte 229 + .byte 50 + .byte 55 + .byte 162 + .byte 167 + .byte 98 + .byte 103 + .byte 226 + .byte 231 + .byte 61 + .byte 105 + .byte 233 + .byte 173 + .byte 109 + .byte 57 + .byte 169 + .byte 237 + .byte 59 + .byte 63 + .byte 171 + .byte 175 + .byte 107 + .byte 111 + .byte 235 + .byte 239 + .byte 38 + .byte 35 + .byte 179 + .byte 182 + .byte 118 + .byte 115 + .byte 243 + .byte 246 + .byte 113 + .byte 36 + .byte 241 + .byte 180 + .byte 33 + .byte 116 + .byte 177 + .byte 244 + .byte 44 + .byte 120 + .byte 248 + .byte 188 + .byte 124 + .byte 40 + .byte 184 + .byte 252 + .byte 42 + .byte 46 + .byte 186 + .byte 190 + .byte 122 + .byte 126 + .byte 250 + .byte 254 + .byte 37 + .byte 112 + .byte 240 + .byte 181 + .byte 117 + .byte 32 + .byte 176 + .byte 245 + .byte 34 + .byte 39 + .byte 178 + .byte 183 + .byte 114 + .byte 119 + .byte 242 + .byte 247 + .byte 45 + .byte 121 + .byte 249 + .byte 189 + .byte 125 + .byte 41 + .byte 185 + .byte 253 + .byte 43 + .byte 47 + .byte 187 + .byte 191 + .byte 123 + .byte 127 + .byte 251 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_2, @object + .size table_2, 256 +table_2: + .byte 0 + .byte 2 + .byte 4 + .byte 6 + .byte 8 + .byte 10 + .byte 12 + .byte 14 + .byte 16 + .byte 18 + .byte 20 + .byte 22 + .byte 24 + .byte 26 + .byte 28 + .byte 30 + .byte 32 + .byte 34 + .byte 36 + .byte 38 + .byte 40 + .byte 42 + .byte 44 + .byte 46 + .byte 48 + .byte 50 + .byte 52 + .byte 54 + .byte 56 + .byte 58 + .byte 60 + .byte 62 + .byte 65 + .byte 67 + .byte 69 + .byte 71 + .byte 73 + .byte 75 + .byte 77 + .byte 79 + .byte 81 + .byte 83 + .byte 85 + .byte 87 + .byte 89 + .byte 91 + .byte 93 + .byte 95 + .byte 97 + .byte 99 + .byte 101 + .byte 103 + .byte 105 + .byte 107 + .byte 109 + .byte 111 + .byte 113 + .byte 115 + .byte 117 + .byte 119 + .byte 121 + .byte 123 + .byte 125 + .byte 127 + .byte 128 + .byte 130 + .byte 132 + .byte 134 + .byte 136 + .byte 138 + .byte 140 + .byte 142 + .byte 144 + .byte 146 + .byte 148 + .byte 150 + .byte 152 + .byte 154 + .byte 156 + .byte 158 + .byte 160 + .byte 162 + .byte 164 + .byte 166 + .byte 168 + .byte 170 + .byte 172 + .byte 174 + .byte 176 + .byte 178 + .byte 180 + .byte 182 + .byte 184 + .byte 186 + .byte 188 + .byte 190 + .byte 193 + .byte 195 + .byte 197 + .byte 199 + .byte 201 + .byte 203 + .byte 205 + .byte 207 + .byte 209 + .byte 211 + .byte 213 + .byte 215 + .byte 217 + .byte 219 + .byte 221 + .byte 223 + .byte 225 + .byte 227 + .byte 229 + .byte 231 + .byte 233 + .byte 235 + .byte 237 + .byte 239 + .byte 241 + .byte 243 + .byte 245 + .byte 247 + .byte 249 + .byte 251 + .byte 253 + .byte 255 + .byte 1 + .byte 3 + .byte 5 + .byte 7 + .byte 9 + .byte 11 + .byte 13 + .byte 15 + .byte 17 + .byte 19 + .byte 21 + .byte 23 + .byte 25 + .byte 27 + .byte 29 + .byte 31 + .byte 33 + .byte 35 + .byte 37 + .byte 39 + .byte 41 + .byte 43 + .byte 45 + .byte 47 + .byte 49 + .byte 51 + .byte 53 + .byte 55 + .byte 57 + .byte 59 + .byte 61 + .byte 63 + .byte 64 + .byte 66 + .byte 68 + .byte 70 + .byte 72 + .byte 74 + .byte 76 + .byte 78 + .byte 80 + .byte 82 + .byte 84 + .byte 86 + .byte 88 + .byte 90 + .byte 92 + .byte 94 + .byte 96 + .byte 98 + .byte 100 + .byte 102 + .byte 104 + .byte 106 + .byte 108 + .byte 110 + .byte 112 + .byte 114 + .byte 116 + .byte 118 + .byte 120 + .byte 122 + .byte 124 + .byte 126 + .byte 129 + .byte 131 + .byte 133 + .byte 135 + .byte 137 + .byte 139 + .byte 141 + .byte 143 + .byte 145 + .byte 147 + .byte 149 + .byte 151 + .byte 153 + .byte 155 + .byte 157 + .byte 159 + .byte 161 + .byte 163 + .byte 165 + .byte 167 + .byte 169 + .byte 171 + .byte 173 + .byte 175 + .byte 177 + .byte 179 + .byte 181 + .byte 183 + .byte 185 + .byte 187 + .byte 189 + .byte 191 + .byte 192 + .byte 194 + .byte 196 + .byte 198 + .byte 200 + .byte 202 + .byte 204 + .byte 206 + .byte 208 + .byte 210 + .byte 212 + .byte 214 + .byte 216 + .byte 218 + .byte 220 + .byte 222 + .byte 224 + .byte 226 + .byte 228 + .byte 230 + .byte 232 + .byte 234 + .byte 236 + .byte 238 + .byte 240 + .byte 242 + .byte 244 + .byte 246 + .byte 248 + .byte 250 + .byte 252 + .byte 254 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_3, @object + .size table_3, 256 +table_3: + .byte 0 + .byte 128 + .byte 1 + .byte 129 + .byte 2 + .byte 130 + .byte 3 + .byte 131 + .byte 4 + .byte 132 + .byte 5 + .byte 133 + .byte 6 + .byte 134 + .byte 7 + .byte 135 + .byte 8 + .byte 136 + .byte 9 + .byte 137 + .byte 10 + .byte 138 + .byte 11 + .byte 139 + .byte 12 + .byte 140 + .byte 13 + .byte 141 + .byte 14 + .byte 142 + .byte 15 + .byte 143 + .byte 16 + .byte 144 + .byte 17 + .byte 145 + .byte 18 + .byte 146 + .byte 19 + .byte 147 + .byte 20 + .byte 148 + .byte 21 + .byte 149 + .byte 22 + .byte 150 + .byte 23 + .byte 151 + .byte 24 + .byte 152 + .byte 25 + .byte 153 + .byte 26 + .byte 154 + .byte 27 + .byte 155 + .byte 28 + .byte 156 + .byte 29 + .byte 157 + .byte 30 + .byte 158 + .byte 31 + .byte 159 + .byte 160 + .byte 32 + .byte 161 + .byte 33 + .byte 162 + .byte 34 + .byte 163 + .byte 35 + .byte 164 + .byte 36 + .byte 165 + .byte 37 + .byte 166 + .byte 38 + .byte 167 + .byte 39 + .byte 168 + .byte 40 + .byte 169 + .byte 41 + .byte 170 + .byte 42 + .byte 171 + .byte 43 + .byte 172 + .byte 44 + .byte 173 + .byte 45 + .byte 174 + .byte 46 + .byte 175 + .byte 47 + .byte 176 + .byte 48 + .byte 177 + .byte 49 + .byte 178 + .byte 50 + .byte 179 + .byte 51 + .byte 180 + .byte 52 + .byte 181 + .byte 53 + .byte 182 + .byte 54 + .byte 183 + .byte 55 + .byte 184 + .byte 56 + .byte 185 + .byte 57 + .byte 186 + .byte 58 + .byte 187 + .byte 59 + .byte 188 + .byte 60 + .byte 189 + .byte 61 + .byte 190 + .byte 62 + .byte 191 + .byte 63 + .byte 64 + .byte 192 + .byte 65 + .byte 193 + .byte 66 + .byte 194 + .byte 67 + .byte 195 + .byte 68 + .byte 196 + .byte 69 + .byte 197 + .byte 70 + .byte 198 + .byte 71 + .byte 199 + .byte 72 + .byte 200 + .byte 73 + .byte 201 + .byte 74 + .byte 202 + .byte 75 + .byte 203 + .byte 76 + .byte 204 + .byte 77 + .byte 205 + .byte 78 + .byte 206 + .byte 79 + .byte 207 + .byte 80 + .byte 208 + .byte 81 + .byte 209 + .byte 82 + .byte 210 + .byte 83 + .byte 211 + .byte 84 + .byte 212 + .byte 85 + .byte 213 + .byte 86 + .byte 214 + .byte 87 + .byte 215 + .byte 88 + .byte 216 + .byte 89 + .byte 217 + .byte 90 + .byte 218 + .byte 91 + .byte 219 + .byte 92 + .byte 220 + .byte 93 + .byte 221 + .byte 94 + .byte 222 + .byte 95 + .byte 223 + .byte 224 + .byte 96 + .byte 225 + .byte 97 + .byte 226 + .byte 98 + .byte 227 + .byte 99 + .byte 228 + .byte 100 + .byte 229 + .byte 101 + .byte 230 + .byte 102 + .byte 231 + .byte 103 + .byte 232 + .byte 104 + .byte 233 + .byte 105 + .byte 234 + .byte 106 + .byte 235 + .byte 107 + .byte 236 + .byte 108 + .byte 237 + .byte 109 + .byte 238 + .byte 110 + .byte 239 + .byte 111 + .byte 240 + .byte 112 + .byte 241 + .byte 113 + .byte 242 + .byte 114 + .byte 243 + .byte 115 + .byte 244 + .byte 116 + .byte 245 + .byte 117 + .byte 246 + .byte 118 + .byte 247 + .byte 119 + .byte 248 + .byte 120 + .byte 249 + .byte 121 + .byte 250 + .byte 122 + .byte 251 + .byte 123 + .byte 252 + .byte 124 + .byte 253 + .byte 125 + .byte 254 + .byte 126 + .byte 255 + .byte 127 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_4, @object + .size table_4, 174 +table_4: + .byte 1 + .byte 0 + .byte 3 + .byte 0 + .byte 7 + .byte 0 + .byte 15 + .byte 0 + .byte 15 + .byte 1 + .byte 15 + .byte 3 + .byte 14 + .byte 7 + .byte 13 + .byte 7 + .byte 11 + .byte 7 + .byte 7 + .byte 7 + .byte 15 + .byte 6 + .byte 15 + .byte 5 + .byte 14 + .byte 3 + .byte 12 + .byte 7 + .byte 9 + .byte 7 + .byte 3 + .byte 7 + .byte 7 + .byte 6 + .byte 15 + .byte 4 + .byte 14 + .byte 1 + .byte 13 + .byte 3 + .byte 10 + .byte 7 + .byte 5 + .byte 7 + .byte 11 + .byte 6 + .byte 7 + .byte 5 + .byte 14 + .byte 2 + .byte 12 + .byte 5 + .byte 8 + .byte 3 + .byte 0 + .byte 7 + .byte 1 + .byte 6 + .byte 3 + .byte 4 + .byte 6 + .byte 0 + .byte 13 + .byte 0 + .byte 11 + .byte 1 + .byte 7 + .byte 3 + .byte 14 + .byte 6 + .byte 13 + .byte 5 + .byte 10 + .byte 3 + .byte 4 + .byte 7 + .byte 9 + .byte 6 + .byte 3 + .byte 5 + .byte 6 + .byte 2 + .byte 12 + .byte 4 + .byte 8 + .byte 1 + .byte 1 + .byte 3 + .byte 2 + .byte 6 + .byte 5 + .byte 4 + .byte 10 + .byte 0 + .byte 5 + .byte 1 + .byte 11 + .byte 2 + .byte 6 + .byte 5 + .byte 12 + .byte 2 + .byte 8 + .byte 5 + .byte 0 + .byte 3 + .byte 0 + .byte 6 + .byte 1 + .byte 4 + .byte 2 + .byte 0 + .byte 5 + .byte 0 + .byte 11 + .byte 0 + .byte 7 + .byte 1 + .byte 15 + .byte 2 + .byte 14 + .byte 5 + .byte 12 + .byte 3 + .byte 8 + .byte 7 + .byte 1 + .byte 7 + .byte 3 + .byte 6 + .byte 7 + .byte 4 + .byte 14 + .byte 0 + .byte 13 + .byte 1 + .byte 11 + .byte 3 + .byte 6 + .byte 7 + .byte 13 + .byte 6 + .byte 11 + .byte 5 + .byte 6 + .byte 3 + .byte 12 + .byte 6 + .byte 9 + .byte 5 + .byte 2 + .byte 3 + .byte 4 + .byte 6 + .byte 9 + .byte 4 + .byte 2 + .byte 1 + .byte 5 + .byte 2 + .byte 10 + .byte 4 + .byte 4 + .byte 1 + .byte 9 + .byte 2 + .byte 2 + .byte 5 + .byte 4 + .byte 2 + .byte 8 + .byte 4 + .byte 0 + .byte 1 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_5, @object + .size table_5, 256 +table_5: + .byte 204 + .byte 198 + .byte 201 + .byte 192 + .byte 193 + .byte 202 + .byte 194 + .byte 203 + .byte 195 + .byte 200 + .byte 197 + .byte 205 + .byte 196 + .byte 206 + .byte 199 + .byte 207 + .byte 108 + .byte 102 + .byte 105 + .byte 96 + .byte 97 + .byte 106 + .byte 98 + .byte 107 + .byte 99 + .byte 104 + .byte 101 + .byte 109 + .byte 100 + .byte 110 + .byte 103 + .byte 111 + .byte 156 + .byte 150 + .byte 153 + .byte 144 + .byte 145 + .byte 154 + .byte 146 + .byte 155 + .byte 147 + .byte 152 + .byte 149 + .byte 157 + .byte 148 + .byte 158 + .byte 151 + .byte 159 + .byte 12 + .byte 6 + .byte 9 + .byte 0 + .byte 1 + .byte 10 + .byte 2 + .byte 11 + .byte 3 + .byte 8 + .byte 5 + .byte 13 + .byte 4 + .byte 14 + .byte 7 + .byte 15 + .byte 28 + .byte 22 + .byte 25 + .byte 16 + .byte 17 + .byte 26 + .byte 18 + .byte 27 + .byte 19 + .byte 24 + .byte 21 + .byte 29 + .byte 20 + .byte 30 + .byte 23 + .byte 31 + .byte 172 + .byte 166 + .byte 169 + .byte 160 + .byte 161 + .byte 170 + .byte 162 + .byte 171 + .byte 163 + .byte 168 + .byte 165 + .byte 173 + .byte 164 + .byte 174 + .byte 167 + .byte 175 + .byte 44 + .byte 38 + .byte 41 + .byte 32 + .byte 33 + .byte 42 + .byte 34 + .byte 43 + .byte 35 + .byte 40 + .byte 37 + .byte 45 + .byte 36 + .byte 46 + .byte 39 + .byte 47 + .byte 188 + .byte 182 + .byte 185 + .byte 176 + .byte 177 + .byte 186 + .byte 178 + .byte 187 + .byte 179 + .byte 184 + .byte 181 + .byte 189 + .byte 180 + .byte 190 + .byte 183 + .byte 191 + .byte 60 + .byte 54 + .byte 57 + .byte 48 + .byte 49 + .byte 58 + .byte 50 + .byte 59 + .byte 51 + .byte 56 + .byte 53 + .byte 61 + .byte 52 + .byte 62 + .byte 55 + .byte 63 + .byte 140 + .byte 134 + .byte 137 + .byte 128 + .byte 129 + .byte 138 + .byte 130 + .byte 139 + .byte 131 + .byte 136 + .byte 133 + .byte 141 + .byte 132 + .byte 142 + .byte 135 + .byte 143 + .byte 92 + .byte 86 + .byte 89 + .byte 80 + .byte 81 + .byte 90 + .byte 82 + .byte 91 + .byte 83 + .byte 88 + .byte 85 + .byte 93 + .byte 84 + .byte 94 + .byte 87 + .byte 95 + .byte 220 + .byte 214 + .byte 217 + .byte 208 + .byte 209 + .byte 218 + .byte 210 + .byte 219 + .byte 211 + .byte 216 + .byte 213 + .byte 221 + .byte 212 + .byte 222 + .byte 215 + .byte 223 + .byte 76 + .byte 70 + .byte 73 + .byte 64 + .byte 65 + .byte 74 + .byte 66 + .byte 75 + .byte 67 + .byte 72 + .byte 69 + .byte 77 + .byte 68 + .byte 78 + .byte 71 + .byte 79 + .byte 236 + .byte 230 + .byte 233 + .byte 224 + .byte 225 + .byte 234 + .byte 226 + .byte 235 + .byte 227 + .byte 232 + .byte 229 + .byte 237 + .byte 228 + .byte 238 + .byte 231 + .byte 239 + .byte 124 + .byte 118 + .byte 121 + .byte 112 + .byte 113 + .byte 122 + .byte 114 + .byte 123 + .byte 115 + .byte 120 + .byte 117 + .byte 125 + .byte 116 + .byte 126 + .byte 119 + .byte 127 + .byte 252 + .byte 246 + .byte 249 + .byte 240 + .byte 241 + .byte 250 + .byte 242 + .byte 251 + .byte 243 + .byte 248 + .byte 245 + .byte 253 + .byte 244 + .byte 254 + .byte 247 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_6, @object + .size table_6, 256 +table_6: + .byte 51 + .byte 52 + .byte 54 + .byte 56 + .byte 60 + .byte 58 + .byte 49 + .byte 62 + .byte 57 + .byte 50 + .byte 53 + .byte 55 + .byte 48 + .byte 59 + .byte 61 + .byte 63 + .byte 67 + .byte 68 + .byte 70 + .byte 72 + .byte 76 + .byte 74 + .byte 65 + .byte 78 + .byte 73 + .byte 66 + .byte 69 + .byte 71 + .byte 64 + .byte 75 + .byte 77 + .byte 79 + .byte 99 + .byte 100 + .byte 102 + .byte 104 + .byte 108 + .byte 106 + .byte 97 + .byte 110 + .byte 105 + .byte 98 + .byte 101 + .byte 103 + .byte 96 + .byte 107 + .byte 109 + .byte 111 + .byte 131 + .byte 132 + .byte 134 + .byte 136 + .byte 140 + .byte 138 + .byte 129 + .byte 142 + .byte 137 + .byte 130 + .byte 133 + .byte 135 + .byte 128 + .byte 139 + .byte 141 + .byte 143 + .byte 195 + .byte 196 + .byte 198 + .byte 200 + .byte 204 + .byte 202 + .byte 193 + .byte 206 + .byte 201 + .byte 194 + .byte 197 + .byte 199 + .byte 192 + .byte 203 + .byte 205 + .byte 207 + .byte 163 + .byte 164 + .byte 166 + .byte 168 + .byte 172 + .byte 170 + .byte 161 + .byte 174 + .byte 169 + .byte 162 + .byte 165 + .byte 167 + .byte 160 + .byte 171 + .byte 173 + .byte 175 + .byte 19 + .byte 20 + .byte 22 + .byte 24 + .byte 28 + .byte 26 + .byte 17 + .byte 30 + .byte 25 + .byte 18 + .byte 21 + .byte 23 + .byte 16 + .byte 27 + .byte 29 + .byte 31 + .byte 227 + .byte 228 + .byte 230 + .byte 232 + .byte 236 + .byte 234 + .byte 225 + .byte 238 + .byte 233 + .byte 226 + .byte 229 + .byte 231 + .byte 224 + .byte 235 + .byte 237 + .byte 239 + .byte 147 + .byte 148 + .byte 150 + .byte 152 + .byte 156 + .byte 154 + .byte 145 + .byte 158 + .byte 153 + .byte 146 + .byte 149 + .byte 151 + .byte 144 + .byte 155 + .byte 157 + .byte 159 + .byte 35 + .byte 36 + .byte 38 + .byte 40 + .byte 44 + .byte 42 + .byte 33 + .byte 46 + .byte 41 + .byte 34 + .byte 37 + .byte 39 + .byte 32 + .byte 43 + .byte 45 + .byte 47 + .byte 83 + .byte 84 + .byte 86 + .byte 88 + .byte 92 + .byte 90 + .byte 81 + .byte 94 + .byte 89 + .byte 82 + .byte 85 + .byte 87 + .byte 80 + .byte 91 + .byte 93 + .byte 95 + .byte 115 + .byte 116 + .byte 118 + .byte 120 + .byte 124 + .byte 122 + .byte 113 + .byte 126 + .byte 121 + .byte 114 + .byte 117 + .byte 119 + .byte 112 + .byte 123 + .byte 125 + .byte 127 + .byte 3 + .byte 4 + .byte 6 + .byte 8 + .byte 12 + .byte 10 + .byte 1 + .byte 14 + .byte 9 + .byte 2 + .byte 5 + .byte 7 + .byte 0 + .byte 11 + .byte 13 + .byte 15 + .byte 179 + .byte 180 + .byte 182 + .byte 184 + .byte 188 + .byte 186 + .byte 177 + .byte 190 + .byte 185 + .byte 178 + .byte 181 + .byte 183 + .byte 176 + .byte 187 + .byte 189 + .byte 191 + .byte 211 + .byte 212 + .byte 214 + .byte 216 + .byte 220 + .byte 218 + .byte 209 + .byte 222 + .byte 217 + .byte 210 + .byte 213 + .byte 215 + .byte 208 + .byte 219 + .byte 221 + .byte 223 + .byte 243 + .byte 244 + .byte 246 + .byte 248 + .byte 252 + .byte 250 + .byte 241 + .byte 254 + .byte 249 + .byte 242 + .byte 245 + .byte 247 + .byte 240 + .byte 251 + .byte 253 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_7, @object + .size table_7, 256 +table_7: + .byte 0 + .byte 2 + .byte 4 + .byte 6 + .byte 9 + .byte 11 + .byte 13 + .byte 15 + .byte 1 + .byte 3 + .byte 5 + .byte 7 + .byte 8 + .byte 10 + .byte 12 + .byte 14 + .byte 32 + .byte 34 + .byte 36 + .byte 38 + .byte 41 + .byte 43 + .byte 45 + .byte 47 + .byte 33 + .byte 35 + .byte 37 + .byte 39 + .byte 40 + .byte 42 + .byte 44 + .byte 46 + .byte 64 + .byte 66 + .byte 68 + .byte 70 + .byte 73 + .byte 75 + .byte 77 + .byte 79 + .byte 65 + .byte 67 + .byte 69 + .byte 71 + .byte 72 + .byte 74 + .byte 76 + .byte 78 + .byte 96 + .byte 98 + .byte 100 + .byte 102 + .byte 105 + .byte 107 + .byte 109 + .byte 111 + .byte 97 + .byte 99 + .byte 101 + .byte 103 + .byte 104 + .byte 106 + .byte 108 + .byte 110 + .byte 144 + .byte 146 + .byte 148 + .byte 150 + .byte 153 + .byte 155 + .byte 157 + .byte 159 + .byte 145 + .byte 147 + .byte 149 + .byte 151 + .byte 152 + .byte 154 + .byte 156 + .byte 158 + .byte 176 + .byte 178 + .byte 180 + .byte 182 + .byte 185 + .byte 187 + .byte 189 + .byte 191 + .byte 177 + .byte 179 + .byte 181 + .byte 183 + .byte 184 + .byte 186 + .byte 188 + .byte 190 + .byte 208 + .byte 210 + .byte 212 + .byte 214 + .byte 217 + .byte 219 + .byte 221 + .byte 223 + .byte 209 + .byte 211 + .byte 213 + .byte 215 + .byte 216 + .byte 218 + .byte 220 + .byte 222 + .byte 240 + .byte 242 + .byte 244 + .byte 246 + .byte 249 + .byte 251 + .byte 253 + .byte 255 + .byte 241 + .byte 243 + .byte 245 + .byte 247 + .byte 248 + .byte 250 + .byte 252 + .byte 254 + .byte 16 + .byte 18 + .byte 20 + .byte 22 + .byte 25 + .byte 27 + .byte 29 + .byte 31 + .byte 17 + .byte 19 + .byte 21 + .byte 23 + .byte 24 + .byte 26 + .byte 28 + .byte 30 + .byte 48 + .byte 50 + .byte 52 + .byte 54 + .byte 57 + .byte 59 + .byte 61 + .byte 63 + .byte 49 + .byte 51 + .byte 53 + .byte 55 + .byte 56 + .byte 58 + .byte 60 + .byte 62 + .byte 80 + .byte 82 + .byte 84 + .byte 86 + .byte 89 + .byte 91 + .byte 93 + .byte 95 + .byte 81 + .byte 83 + .byte 85 + .byte 87 + .byte 88 + .byte 90 + .byte 92 + .byte 94 + .byte 112 + .byte 114 + .byte 116 + .byte 118 + .byte 121 + .byte 123 + .byte 125 + .byte 127 + .byte 113 + .byte 115 + .byte 117 + .byte 119 + .byte 120 + .byte 122 + .byte 124 + .byte 126 + .byte 128 + .byte 130 + .byte 132 + .byte 134 + .byte 137 + .byte 139 + .byte 141 + .byte 143 + .byte 129 + .byte 131 + .byte 133 + .byte 135 + .byte 136 + .byte 138 + .byte 140 + .byte 142 + .byte 160 + .byte 162 + .byte 164 + .byte 166 + .byte 169 + .byte 171 + .byte 173 + .byte 175 + .byte 161 + .byte 163 + .byte 165 + .byte 167 + .byte 168 + .byte 170 + .byte 172 + .byte 174 + .byte 192 + .byte 194 + .byte 196 + .byte 198 + .byte 201 + .byte 203 + .byte 205 + .byte 207 + .byte 193 + .byte 195 + .byte 197 + .byte 199 + .byte 200 + .byte 202 + .byte 204 + .byte 206 + .byte 224 + .byte 226 + .byte 228 + .byte 230 + .byte 233 + .byte 235 + .byte 237 + .byte 239 + .byte 225 + .byte 227 + .byte 229 + .byte 231 + .byte 232 + .byte 234 + .byte 236 + .byte 238 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_8, @object + .size table_8, 256 +table_8: + .byte 0 + .byte 8 + .byte 1 + .byte 9 + .byte 2 + .byte 10 + .byte 3 + .byte 11 + .byte 12 + .byte 4 + .byte 13 + .byte 5 + .byte 14 + .byte 6 + .byte 15 + .byte 7 + .byte 128 + .byte 136 + .byte 129 + .byte 137 + .byte 130 + .byte 138 + .byte 131 + .byte 139 + .byte 140 + .byte 132 + .byte 141 + .byte 133 + .byte 142 + .byte 134 + .byte 143 + .byte 135 + .byte 16 + .byte 24 + .byte 17 + .byte 25 + .byte 18 + .byte 26 + .byte 19 + .byte 27 + .byte 28 + .byte 20 + .byte 29 + .byte 21 + .byte 30 + .byte 22 + .byte 31 + .byte 23 + .byte 144 + .byte 152 + .byte 145 + .byte 153 + .byte 146 + .byte 154 + .byte 147 + .byte 155 + .byte 156 + .byte 148 + .byte 157 + .byte 149 + .byte 158 + .byte 150 + .byte 159 + .byte 151 + .byte 32 + .byte 40 + .byte 33 + .byte 41 + .byte 34 + .byte 42 + .byte 35 + .byte 43 + .byte 44 + .byte 36 + .byte 45 + .byte 37 + .byte 46 + .byte 38 + .byte 47 + .byte 39 + .byte 160 + .byte 168 + .byte 161 + .byte 169 + .byte 162 + .byte 170 + .byte 163 + .byte 171 + .byte 172 + .byte 164 + .byte 173 + .byte 165 + .byte 174 + .byte 166 + .byte 175 + .byte 167 + .byte 48 + .byte 56 + .byte 49 + .byte 57 + .byte 50 + .byte 58 + .byte 51 + .byte 59 + .byte 60 + .byte 52 + .byte 61 + .byte 53 + .byte 62 + .byte 54 + .byte 63 + .byte 55 + .byte 176 + .byte 184 + .byte 177 + .byte 185 + .byte 178 + .byte 186 + .byte 179 + .byte 187 + .byte 188 + .byte 180 + .byte 189 + .byte 181 + .byte 190 + .byte 182 + .byte 191 + .byte 183 + .byte 192 + .byte 200 + .byte 193 + .byte 201 + .byte 194 + .byte 202 + .byte 195 + .byte 203 + .byte 204 + .byte 196 + .byte 205 + .byte 197 + .byte 206 + .byte 198 + .byte 207 + .byte 199 + .byte 64 + .byte 72 + .byte 65 + .byte 73 + .byte 66 + .byte 74 + .byte 67 + .byte 75 + .byte 76 + .byte 68 + .byte 77 + .byte 69 + .byte 78 + .byte 70 + .byte 79 + .byte 71 + .byte 208 + .byte 216 + .byte 209 + .byte 217 + .byte 210 + .byte 218 + .byte 211 + .byte 219 + .byte 220 + .byte 212 + .byte 221 + .byte 213 + .byte 222 + .byte 214 + .byte 223 + .byte 215 + .byte 80 + .byte 88 + .byte 81 + .byte 89 + .byte 82 + .byte 90 + .byte 83 + .byte 91 + .byte 92 + .byte 84 + .byte 93 + .byte 85 + .byte 94 + .byte 86 + .byte 95 + .byte 87 + .byte 224 + .byte 232 + .byte 225 + .byte 233 + .byte 226 + .byte 234 + .byte 227 + .byte 235 + .byte 236 + .byte 228 + .byte 237 + .byte 229 + .byte 238 + .byte 230 + .byte 239 + .byte 231 + .byte 96 + .byte 104 + .byte 97 + .byte 105 + .byte 98 + .byte 106 + .byte 99 + .byte 107 + .byte 108 + .byte 100 + .byte 109 + .byte 101 + .byte 110 + .byte 102 + .byte 111 + .byte 103 + .byte 240 + .byte 248 + .byte 241 + .byte 249 + .byte 242 + .byte 250 + .byte 243 + .byte 251 + .byte 252 + .byte 244 + .byte 253 + .byte 245 + .byte 254 + .byte 246 + .byte 255 + .byte 247 + .byte 112 + .byte 120 + .byte 113 + .byte 121 + .byte 114 + .byte 122 + .byte 115 + .byte 123 + .byte 124 + .byte 116 + .byte 125 + .byte 117 + .byte 126 + .byte 118 + .byte 127 + .byte 119 + + .text +.global forkskinny_128_256_rounds + .type forkskinny_128_256_rounds, @function +forkskinny_128_256_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 51 + ldd r2,Z+32 + ldd r3,Z+33 + ldd r4,Z+34 + ldd r5,Z+35 + ldd r6,Z+36 + ldd r7,Z+37 + ldd r8,Z+38 + ldd r9,Z+39 + ldd r10,Z+40 + ldd r11,Z+41 + ldd r12,Z+42 + ldd r13,Z+43 + ldd r14,Z+44 + ldd r15,Z+45 + ldd r24,Z+46 + ldd r25,Z+47 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + lsl r20 + std Y+33,r20 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +86: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + mov r0,r14 + mov r14,r15 + mov r15,r24 + mov r24,r25 + mov r25,r0 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + movw r18,r14 + movw r26,r24 + eor r18,r10 + eor r19,r11 + eor r26,r12 + eor r27,r13 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r18 + movw r4,r26 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + ldd r0,Y+1 + std Y+9,r0 + ldd r0,Y+2 + std Y+10,r0 + ldd r0,Y+3 + std Y+11,r0 + ldd r0,Y+4 + std Y+12,r0 + ldd r0,Y+5 + std Y+13,r0 + ldd r0,Y+6 + std Y+14,r0 + ldd r0,Y+7 + std Y+15,r0 + ldd r0,Y+8 + std Y+16,r0 + std Y+1,r19 + std Y+2,r17 + std Y+3,r18 + std Y+4,r21 + std Y+5,r26 + std Y+6,r16 + std Y+7,r20 + std Y+8,r27 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r16,Y+31 + ldd r17,Y+32 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+17 + std Y+25,r0 + ldd r0,Y+18 + std Y+26,r0 + ldd r0,Y+19 + std Y+27,r0 + ldd r0,Y+20 + std Y+28,r0 + ldd r0,Y+21 + std Y+29,r0 + ldd r0,Y+22 + std Y+30,r0 + ldd r0,Y+23 + std Y+31,r0 + ldd r0,Y+24 + std Y+32,r0 + std Y+17,r19 + std Y+18,r17 + std Y+19,r18 + std Y+20,r21 + std Y+21,r26 + std Y+22,r16 + std Y+23,r20 + std Y+24,r27 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + ldd r18,Y+33 + cp r22,r18 + breq 5259f + rjmp 86b +5259: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+32,r2 + std Z+33,r3 + std Z+34,r4 + std Z+35,r5 + std Z+36,r6 + std Z+37,r7 + std Z+38,r8 + std Z+39,r9 + std Z+40,r10 + std Z+41,r11 + std Z+42,r12 + std Z+43,r13 + std Z+44,r14 + std Z+45,r15 + std Z+46,r24 + std Z+47,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + adiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_256_rounds, .-forkskinny_128_256_rounds + + .text +.global forkskinny_128_256_inv_rounds + .type forkskinny_128_256_inv_rounds, @function +forkskinny_128_256_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 51 + ldd r2,Z+32 + ldd r3,Z+33 + ldd r4,Z+34 + ldd r5,Z+35 + ldd r6,Z+36 + ldd r7,Z+37 + ldd r8,Z+38 + ldd r9,Z+39 + ldd r10,Z+40 + ldd r11,Z+41 + ldd r12,Z+42 + ldd r13,Z+43 + ldd r14,Z+44 + ldd r15,Z+45 + ldd r24,Z+46 + ldd r25,Z+47 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + lsl r20 + std Y+33,r20 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +86: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + ldd r0,Y+9 + std Y+1,r0 + ldd r0,Y+10 + std Y+2,r0 + ldd r0,Y+11 + std Y+3,r0 + ldd r0,Y+12 + std Y+4,r0 + ldd r0,Y+13 + std Y+5,r0 + ldd r0,Y+14 + std Y+6,r0 + ldd r0,Y+15 + std Y+7,r0 + ldd r0,Y+16 + std Y+8,r0 + std Y+9,r26 + std Y+10,r18 + std Y+11,r20 + std Y+12,r17 + std Y+13,r16 + std Y+14,r27 + std Y+15,r21 + std Y+16,r19 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r16,Y+23 + ldd r17,Y+24 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+25 + std Y+17,r0 + ldd r0,Y+26 + std Y+18,r0 + ldd r0,Y+27 + std Y+19,r0 + ldd r0,Y+28 + std Y+20,r0 + ldd r0,Y+29 + std Y+21,r0 + ldd r0,Y+30 + std Y+22,r0 + ldd r0,Y+31 + std Y+23,r0 + ldd r0,Y+32 + std Y+24,r0 + std Y+25,r26 + std Y+26,r18 + std Y+27,r20 + std Y+28,r17 + std Y+29,r16 + std Y+30,r27 + std Y+31,r21 + std Y+32,r19 + movw r18,r2 + movw r26,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r18 + movw r24,r26 + eor r14,r10 + eor r15,r11 + eor r24,r12 + eor r25,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + mov r0,r6 + mov r6,r7 + mov r7,r8 + mov r8,r9 + mov r9,r0 + mov r0,r10 + mov r10,r12 + mov r12,r0 + mov r0,r11 + mov r11,r13 + mov r13,r0 + mov r0,r25 + mov r25,r24 + mov r24,r15 + mov r15,r14 + mov r14,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + ldi r30,lo8(table_1) + ldi r31,hi8(table_1) +#if defined(RAMPZ) + ldi r18,hh8(table_1) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+33 + cp r22,r18 + breq 5259f + rjmp 86b +5259: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+32,r2 + std Z+33,r3 + std Z+34,r4 + std Z+35,r5 + std Z+36,r6 + std Z+37,r7 + std Z+38,r8 + std Z+39,r9 + std Z+40,r10 + std Z+41,r11 + std Z+42,r12 + std Z+43,r13 + std Z+44,r14 + std Z+45,r15 + std Z+46,r24 + std Z+47,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + adiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_256_inv_rounds, .-forkskinny_128_256_inv_rounds + + .text +.global forkskinny_128_256_forward_tk + .type forkskinny_128_256_forward_tk, @function +forkskinny_128_256_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 34 + ld r4,Z + ldd r5,Z+1 + ldd r6,Z+2 + ldd r7,Z+3 + ldd r8,Z+4 + ldd r9,Z+5 + ldd r10,Z+6 + ldd r11,Z+7 + ldd r12,Z+8 + ldd r13,Z+9 + ldd r14,Z+10 + ldd r15,Z+11 + ldd r24,Z+12 + ldd r25,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + push r31 + push r30 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r23,hh8(table_2) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +51: + movw r18,r12 + movw r20,r14 + movw r26,r24 + movw r2,r16 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + mov r4,r19 + mov r5,r3 + mov r6,r18 + mov r7,r27 + mov r8,r20 + mov r9,r2 + mov r10,r26 + mov r11,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r26,Y+13 + ldd r27,Y+14 + ldd r2,Y+15 + ldd r3,Y+16 + ldd r23,Y+1 + std Y+9,r23 + ldd r23,Y+2 + std Y+10,r23 + ldd r23,Y+3 + std Y+11,r23 + ldd r23,Y+4 + std Y+12,r23 + ldd r23,Y+5 + std Y+13,r23 + ldd r23,Y+6 + std Y+14,r23 + ldd r23,Y+7 + std Y+15,r23 + ldd r23,Y+8 + std Y+16,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+1,r19 + std Y+2,r3 + std Y+3,r18 + std Y+4,r27 + std Y+5,r20 + std Y+6,r2 + std Y+7,r26 + std Y+8,r21 + dec r22 + breq 5109f + rjmp 51b +5109: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r4 + std Z+1,r5 + std Z+2,r6 + std Z+3,r7 + std Z+4,r8 + std Z+5,r9 + std Z+6,r10 + std Z+7,r11 + std Z+8,r12 + std Z+9,r13 + std Z+10,r14 + std Z+11,r15 + std Z+12,r24 + std Z+13,r25 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_256_forward_tk, .-forkskinny_128_256_forward_tk + + .text +.global forkskinny_128_256_reverse_tk + .type forkskinny_128_256_reverse_tk, @function +forkskinny_128_256_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 34 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r23,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +51: + movw r18,r2 + movw r20,r4 + movw r26,r6 + movw r24,r8 + movw r2,r10 + movw r4,r12 + movw r6,r14 + movw r8,r16 + mov r10,r20 + mov r11,r18 + mov r12,r26 + mov r13,r25 + mov r14,r24 + mov r15,r21 + mov r16,r27 + mov r17,r19 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r24,Y+7 + ldd r25,Y+8 + ldd r23,Y+9 + std Y+1,r23 + ldd r23,Y+10 + std Y+2,r23 + ldd r23,Y+11 + std Y+3,r23 + ldd r23,Y+12 + std Y+4,r23 + ldd r23,Y+13 + std Y+5,r23 + ldd r23,Y+14 + std Y+6,r23 + ldd r23,Y+15 + std Y+7,r23 + ldd r23,Y+16 + std Y+8,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+9,r20 + std Y+10,r18 + std Y+11,r26 + std Y+12,r25 + std Y+13,r24 + std Y+14,r21 + std Y+15,r27 + std Y+16,r19 + dec r22 + breq 5109f + rjmp 51b +5109: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r2 + std Z+1,r3 + std Z+2,r4 + std Z+3,r5 + std Z+4,r6 + std Z+5,r7 + std Z+6,r8 + std Z+7,r9 + std Z+8,r10 + std Z+9,r11 + std Z+10,r12 + std Z+11,r13 + std Z+12,r14 + std Z+13,r15 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_256_reverse_tk, .-forkskinny_128_256_reverse_tk + + .text +.global forkskinny_128_384_rounds + .type forkskinny_128_384_rounds, @function +forkskinny_128_384_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 67 + ldd r2,Z+48 + ldd r3,Z+49 + ldd r4,Z+50 + ldd r5,Z+51 + ldd r6,Z+52 + ldd r7,Z+53 + ldd r8,Z+54 + ldd r9,Z+55 + ldd r10,Z+56 + ldd r11,Z+57 + ldd r12,Z+58 + ldd r13,Z+59 + ldd r14,Z+60 + ldd r15,Z+61 + ldd r24,Z+62 + ldd r25,Z+63 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r26,Z+34 + ldd r27,Z+35 + std Y+33,r18 + std Y+34,r19 + std Y+35,r26 + std Y+36,r27 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + std Y+37,r18 + std Y+38,r19 + std Y+39,r26 + std Y+40,r27 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r26,Z+42 + ldd r27,Z+43 + std Y+41,r18 + std Y+42,r19 + std Y+43,r26 + std Y+44,r27 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r26,Z+46 + ldd r27,Z+47 + std Y+45,r18 + std Y+46,r19 + std Y+47,r26 + std Y+48,r27 + lsl r20 + std Y+49,r20 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +118: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + mov r0,r14 + mov r14,r15 + mov r15,r24 + mov r24,r25 + mov r25,r0 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + movw r18,r14 + movw r26,r24 + eor r18,r10 + eor r19,r11 + eor r26,r12 + eor r27,r13 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r18 + movw r4,r26 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + ldd r0,Y+1 + std Y+9,r0 + ldd r0,Y+2 + std Y+10,r0 + ldd r0,Y+3 + std Y+11,r0 + ldd r0,Y+4 + std Y+12,r0 + ldd r0,Y+5 + std Y+13,r0 + ldd r0,Y+6 + std Y+14,r0 + ldd r0,Y+7 + std Y+15,r0 + ldd r0,Y+8 + std Y+16,r0 + std Y+1,r19 + std Y+2,r17 + std Y+3,r18 + std Y+4,r21 + std Y+5,r26 + std Y+6,r16 + std Y+7,r20 + std Y+8,r27 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r16,Y+31 + ldd r17,Y+32 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+17 + std Y+25,r0 + ldd r0,Y+18 + std Y+26,r0 + ldd r0,Y+19 + std Y+27,r0 + ldd r0,Y+20 + std Y+28,r0 + ldd r0,Y+21 + std Y+29,r0 + ldd r0,Y+22 + std Y+30,r0 + ldd r0,Y+23 + std Y+31,r0 + ldd r0,Y+24 + std Y+32,r0 + std Y+17,r19 + std Y+18,r17 + std Y+19,r18 + std Y+20,r21 + std Y+21,r26 + std Y+22,r16 + std Y+23,r20 + std Y+24,r27 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + ldd r20,Y+45 + ldd r21,Y+46 + ldd r16,Y+47 + ldd r17,Y+48 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+33 + std Y+41,r0 + ldd r0,Y+34 + std Y+42,r0 + ldd r0,Y+35 + std Y+43,r0 + ldd r0,Y+36 + std Y+44,r0 + ldd r0,Y+37 + std Y+45,r0 + ldd r0,Y+38 + std Y+46,r0 + ldd r0,Y+39 + std Y+47,r0 + ldd r0,Y+40 + std Y+48,r0 + std Y+33,r19 + std Y+34,r17 + std Y+35,r18 + std Y+36,r21 + std Y+37,r26 + std Y+38,r16 + std Y+39,r20 + std Y+40,r27 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + ldd r18,Y+49 + cp r22,r18 + breq 5348f + rjmp 118b +5348: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+48,r2 + std Z+49,r3 + std Z+50,r4 + std Z+51,r5 + std Z+52,r6 + std Z+53,r7 + std Z+54,r8 + std Z+55,r9 + std Z+56,r10 + std Z+57,r11 + std Z+58,r12 + std Z+59,r13 + std Z+60,r14 + std Z+61,r15 + std Z+62,r24 + std Z+63,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + std Z+32,r18 + std Z+33,r19 + std Z+34,r26 + std Z+35,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + std Z+36,r18 + std Z+37,r19 + std Z+38,r26 + std Z+39,r27 + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + std Z+40,r18 + std Z+41,r19 + std Z+42,r26 + std Z+43,r27 + ldd r18,Y+45 + ldd r19,Y+46 + ldd r26,Y+47 + ldd r27,Y+48 + std Z+44,r18 + std Z+45,r19 + std Z+46,r26 + std Z+47,r27 + adiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_384_rounds, .-forkskinny_128_384_rounds + + .text +.global forkskinny_128_384_inv_rounds + .type forkskinny_128_384_inv_rounds, @function +forkskinny_128_384_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 67 + ldd r2,Z+48 + ldd r3,Z+49 + ldd r4,Z+50 + ldd r5,Z+51 + ldd r6,Z+52 + ldd r7,Z+53 + ldd r8,Z+54 + ldd r9,Z+55 + ldd r10,Z+56 + ldd r11,Z+57 + ldd r12,Z+58 + ldd r13,Z+59 + ldd r14,Z+60 + ldd r15,Z+61 + ldd r24,Z+62 + ldd r25,Z+63 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r26,Z+34 + ldd r27,Z+35 + std Y+33,r18 + std Y+34,r19 + std Y+35,r26 + std Y+36,r27 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + std Y+37,r18 + std Y+38,r19 + std Y+39,r26 + std Y+40,r27 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r26,Z+42 + ldd r27,Z+43 + std Y+41,r18 + std Y+42,r19 + std Y+43,r26 + std Y+44,r27 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r26,Z+46 + ldd r27,Z+47 + std Y+45,r18 + std Y+46,r19 + std Y+47,r26 + std Y+48,r27 + lsl r20 + std Y+49,r20 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +118: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + ldd r0,Y+9 + std Y+1,r0 + ldd r0,Y+10 + std Y+2,r0 + ldd r0,Y+11 + std Y+3,r0 + ldd r0,Y+12 + std Y+4,r0 + ldd r0,Y+13 + std Y+5,r0 + ldd r0,Y+14 + std Y+6,r0 + ldd r0,Y+15 + std Y+7,r0 + ldd r0,Y+16 + std Y+8,r0 + std Y+9,r26 + std Y+10,r18 + std Y+11,r20 + std Y+12,r17 + std Y+13,r16 + std Y+14,r27 + std Y+15,r21 + std Y+16,r19 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r16,Y+23 + ldd r17,Y+24 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+25 + std Y+17,r0 + ldd r0,Y+26 + std Y+18,r0 + ldd r0,Y+27 + std Y+19,r0 + ldd r0,Y+28 + std Y+20,r0 + ldd r0,Y+29 + std Y+21,r0 + ldd r0,Y+30 + std Y+22,r0 + ldd r0,Y+31 + std Y+23,r0 + ldd r0,Y+32 + std Y+24,r0 + std Y+25,r26 + std Y+26,r18 + std Y+27,r20 + std Y+28,r17 + std Y+29,r16 + std Y+30,r27 + std Y+31,r21 + std Y+32,r19 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + ldd r20,Y+37 + ldd r21,Y+38 + ldd r16,Y+39 + ldd r17,Y+40 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+41 + std Y+33,r0 + ldd r0,Y+42 + std Y+34,r0 + ldd r0,Y+43 + std Y+35,r0 + ldd r0,Y+44 + std Y+36,r0 + ldd r0,Y+45 + std Y+37,r0 + ldd r0,Y+46 + std Y+38,r0 + ldd r0,Y+47 + std Y+39,r0 + ldd r0,Y+48 + std Y+40,r0 + std Y+41,r26 + std Y+42,r18 + std Y+43,r20 + std Y+44,r17 + std Y+45,r16 + std Y+46,r27 + std Y+47,r21 + std Y+48,r19 + movw r18,r2 + movw r26,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r18 + movw r24,r26 + eor r14,r10 + eor r15,r11 + eor r24,r12 + eor r25,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + mov r0,r6 + mov r6,r7 + mov r7,r8 + mov r8,r9 + mov r9,r0 + mov r0,r10 + mov r10,r12 + mov r12,r0 + mov r0,r11 + mov r11,r13 + mov r13,r0 + mov r0,r25 + mov r25,r24 + mov r24,r15 + mov r15,r14 + mov r14,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + ldi r30,lo8(table_1) + ldi r31,hi8(table_1) +#if defined(RAMPZ) + ldi r18,hh8(table_1) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+49 + cp r22,r18 + breq 5348f + rjmp 118b +5348: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+48,r2 + std Z+49,r3 + std Z+50,r4 + std Z+51,r5 + std Z+52,r6 + std Z+53,r7 + std Z+54,r8 + std Z+55,r9 + std Z+56,r10 + std Z+57,r11 + std Z+58,r12 + std Z+59,r13 + std Z+60,r14 + std Z+61,r15 + std Z+62,r24 + std Z+63,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + std Z+32,r18 + std Z+33,r19 + std Z+34,r26 + std Z+35,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + std Z+36,r18 + std Z+37,r19 + std Z+38,r26 + std Z+39,r27 + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + std Z+40,r18 + std Z+41,r19 + std Z+42,r26 + std Z+43,r27 + ldd r18,Y+45 + ldd r19,Y+46 + ldd r26,Y+47 + ldd r27,Y+48 + std Z+44,r18 + std Z+45,r19 + std Z+46,r26 + std Z+47,r27 + adiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_384_inv_rounds, .-forkskinny_128_384_inv_rounds + + .text +.global forkskinny_128_384_forward_tk + .type forkskinny_128_384_forward_tk, @function +forkskinny_128_384_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r4,Z + ldd r5,Z+1 + ldd r6,Z+2 + ldd r7,Z+3 + ldd r8,Z+4 + ldd r9,Z+5 + ldd r10,Z+6 + ldd r11,Z+7 + ldd r12,Z+8 + ldd r13,Z+9 + ldd r14,Z+10 + ldd r15,Z+11 + ldd r24,Z+12 + ldd r25,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + std Y+17,r18 + std Y+18,r19 + std Y+19,r20 + std Y+20,r21 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r20,Z+38 + ldd r21,Z+39 + std Y+21,r18 + std Y+22,r19 + std Y+23,r20 + std Y+24,r21 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r20,Z+42 + ldd r21,Z+43 + std Y+25,r18 + std Y+26,r19 + std Y+27,r20 + std Y+28,r21 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r20,Z+46 + ldd r21,Z+47 + std Y+29,r18 + std Y+30,r19 + std Y+31,r20 + std Y+32,r21 + push r31 + push r30 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r23,hh8(table_2) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +83: + movw r18,r12 + movw r20,r14 + movw r26,r24 + movw r2,r16 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + mov r4,r19 + mov r5,r3 + mov r6,r18 + mov r7,r27 + mov r8,r20 + mov r9,r2 + mov r10,r26 + mov r11,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r26,Y+13 + ldd r27,Y+14 + ldd r2,Y+15 + ldd r3,Y+16 + ldd r23,Y+1 + std Y+9,r23 + ldd r23,Y+2 + std Y+10,r23 + ldd r23,Y+3 + std Y+11,r23 + ldd r23,Y+4 + std Y+12,r23 + ldd r23,Y+5 + std Y+13,r23 + ldd r23,Y+6 + std Y+14,r23 + ldd r23,Y+7 + std Y+15,r23 + ldd r23,Y+8 + std Y+16,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+1,r19 + std Y+2,r3 + std Y+3,r18 + std Y+4,r27 + std Y+5,r20 + std Y+6,r2 + std Y+7,r26 + std Y+8,r21 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + ldd r26,Y+29 + ldd r27,Y+30 + ldd r2,Y+31 + ldd r3,Y+32 + ldd r23,Y+17 + std Y+25,r23 + ldd r23,Y+18 + std Y+26,r23 + ldd r23,Y+19 + std Y+27,r23 + ldd r23,Y+20 + std Y+28,r23 + ldd r23,Y+21 + std Y+29,r23 + ldd r23,Y+22 + std Y+30,r23 + ldd r23,Y+23 + std Y+31,r23 + ldd r23,Y+24 + std Y+32,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+17,r19 + std Y+18,r3 + std Y+19,r18 + std Y+20,r27 + std Y+21,r20 + std Y+22,r2 + std Y+23,r26 + std Y+24,r21 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + breq 5183f + rjmp 83b +5183: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r4 + std Z+1,r5 + std Z+2,r6 + std Z+3,r7 + std Z+4,r8 + std Z+5,r9 + std Z+6,r10 + std Z+7,r11 + std Z+8,r12 + std Z+9,r13 + std Z+10,r14 + std Z+11,r15 + std Z+12,r24 + std Z+13,r25 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r20,Y+23 + ldd r21,Y+24 + std Z+36,r18 + std Z+37,r19 + std Z+38,r20 + std Z+39,r21 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + std Z+40,r18 + std Z+41,r19 + std Z+42,r20 + std Z+43,r21 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r20,Y+31 + ldd r21,Y+32 + std Z+44,r18 + std Z+45,r19 + std Z+46,r20 + std Z+47,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_384_forward_tk, .-forkskinny_128_384_forward_tk + + .text +.global forkskinny_128_384_reverse_tk + .type forkskinny_128_384_reverse_tk, @function +forkskinny_128_384_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + std Y+17,r18 + std Y+18,r19 + std Y+19,r20 + std Y+20,r21 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r20,Z+38 + ldd r21,Z+39 + std Y+21,r18 + std Y+22,r19 + std Y+23,r20 + std Y+24,r21 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r20,Z+42 + ldd r21,Z+43 + std Y+25,r18 + std Y+26,r19 + std Y+27,r20 + std Y+28,r21 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r20,Z+46 + ldd r21,Z+47 + std Y+29,r18 + std Y+30,r19 + std Y+31,r20 + std Y+32,r21 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r23,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +83: + movw r18,r2 + movw r20,r4 + movw r26,r6 + movw r24,r8 + movw r2,r10 + movw r4,r12 + movw r6,r14 + movw r8,r16 + mov r10,r20 + mov r11,r18 + mov r12,r26 + mov r13,r25 + mov r14,r24 + mov r15,r21 + mov r16,r27 + mov r17,r19 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r24,Y+7 + ldd r25,Y+8 + ldd r23,Y+9 + std Y+1,r23 + ldd r23,Y+10 + std Y+2,r23 + ldd r23,Y+11 + std Y+3,r23 + ldd r23,Y+12 + std Y+4,r23 + ldd r23,Y+13 + std Y+5,r23 + ldd r23,Y+14 + std Y+6,r23 + ldd r23,Y+15 + std Y+7,r23 + ldd r23,Y+16 + std Y+8,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+9,r20 + std Y+10,r18 + std Y+11,r26 + std Y+12,r25 + std Y+13,r24 + std Y+14,r21 + std Y+15,r27 + std Y+16,r19 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + ldd r26,Y+21 + ldd r27,Y+22 + ldd r24,Y+23 + ldd r25,Y+24 + ldd r23,Y+25 + std Y+17,r23 + ldd r23,Y+26 + std Y+18,r23 + ldd r23,Y+27 + std Y+19,r23 + ldd r23,Y+28 + std Y+20,r23 + ldd r23,Y+29 + std Y+21,r23 + ldd r23,Y+30 + std Y+22,r23 + ldd r23,Y+31 + std Y+23,r23 + ldd r23,Y+32 + std Y+24,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+25,r20 + std Y+26,r18 + std Y+27,r26 + std Y+28,r25 + std Y+29,r24 + std Y+30,r21 + std Y+31,r27 + std Y+32,r19 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + breq 5183f + rjmp 83b +5183: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r2 + std Z+1,r3 + std Z+2,r4 + std Z+3,r5 + std Z+4,r6 + std Z+5,r7 + std Z+6,r8 + std Z+7,r9 + std Z+8,r10 + std Z+9,r11 + std Z+10,r12 + std Z+11,r13 + std Z+12,r14 + std Z+13,r15 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r20,Y+23 + ldd r21,Y+24 + std Z+36,r18 + std Z+37,r19 + std Z+38,r20 + std Z+39,r21 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + std Z+40,r18 + std Z+41,r19 + std Z+42,r20 + std Z+43,r21 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r20,Y+31 + ldd r21,Y+32 + std Z+44,r18 + std Z+45,r19 + std Z+46,r20 + std Z+47,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_384_reverse_tk, .-forkskinny_128_384_reverse_tk + + .text +.global forkskinny_64_192_rounds + .type forkskinny_64_192_rounds, @function +forkskinny_64_192_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 38 + ldd r26,Z+24 + ldd r27,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + ld r18,Z + ldd r19,Z+1 + std Y+1,r18 + std Y+2,r19 + ldd r18,Z+2 + ldd r19,Z+3 + std Y+3,r18 + std Y+4,r19 + ldd r18,Z+4 + ldd r19,Z+5 + std Y+5,r18 + std Y+6,r19 + ldd r18,Z+6 + ldd r19,Z+7 + std Y+7,r18 + std Y+8,r19 + ldd r18,Z+8 + ldd r19,Z+9 + std Y+9,r18 + std Y+10,r19 + ldd r18,Z+10 + ldd r19,Z+11 + std Y+11,r18 + std Y+12,r19 + ldd r18,Z+12 + ldd r19,Z+13 + std Y+13,r18 + std Y+14,r19 + ldd r18,Z+14 + ldd r19,Z+15 + std Y+15,r18 + std Y+16,r19 + ldd r18,Z+16 + ldd r19,Z+17 + std Y+17,r18 + std Y+18,r19 + ldd r18,Z+18 + ldd r19,Z+19 + std Y+19,r18 + std Y+20,r19 + ldd r18,Z+20 + ldd r19,Z+21 + std Y+21,r18 + std Y+22,r19 + ldd r18,Z+22 + ldd r19,Z+23 + std Y+23,r18 + std Y+24,r19 + push r31 + push r30 + ldi r30,lo8(table_5) + ldi r31,hi8(table_5) +#if defined(RAMPZ) + ldi r18,hh8(table_5) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 + lsl r20 +61: + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + eor r26,r18 + eor r27,r19 + ldd r18,Y+3 + ldd r19,Y+4 + eor r2,r18 + eor r3,r19 + ldd r18,Y+9 + ldd r19,Y+10 + eor r26,r18 + eor r27,r19 + ldd r18,Y+11 + ldd r19,Y+12 + eor r2,r18 + eor r3,r19 + ldd r18,Y+17 + ldd r19,Y+18 + eor r26,r18 + eor r27,r19 + ldd r18,Y+19 + ldd r19,Y+20 + eor r2,r18 + eor r3,r19 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r27,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r3,r18 + ldi r18,32 + eor r5,r18 + eor r26,r18 + mov r0,r1 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + or r3,r0 + mov r0,r4 + mov r4,r5 + mov r5,r0 + mov r0,r6 + mov r6,r7 + mov r7,r0 + mov r0,r1 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + or r7,r0 + eor r2,r4 + eor r3,r5 + eor r4,r26 + eor r5,r27 + movw r18,r6 + eor r18,r4 + eor r19,r5 + movw r6,r4 + movw r4,r2 + movw r2,r26 + movw r26,r18 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r8,Y+3 + ldd r9,Y+4 + ldd r10,Y+5 + ldd r11,Y+6 + ldd r12,Y+7 + ldd r13,Y+8 + std Y+5,r18 + std Y+6,r19 + std Y+7,r8 + std Y+8,r9 + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + ldi r25,240 + and r9,r25 + swap r12 + ldi r24,15 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+1,r18 + std Y+2,r19 + std Y+3,r8 + std Y+4,r9 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r18,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+9 + ldd r19,Y+10 + ldd r8,Y+11 + ldd r9,Y+12 + ldd r10,Y+13 + ldd r11,Y+14 + ldd r12,Y+15 + ldd r13,Y+16 + std Y+13,r18 + std Y+14,r19 + std Y+15,r8 + std Y+16,r9 + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + and r9,r25 + swap r12 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+9,r18 + std Y+10,r19 + std Y+11,r8 + std Y+12,r9 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r8,Y+19 + ldd r9,Y+20 + ldd r10,Y+21 + ldd r11,Y+22 + ldd r12,Y+23 + ldd r13,Y+24 + std Y+21,r18 + std Y+22,r19 + std Y+23,r8 + std Y+24,r9 + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + and r9,r25 + swap r12 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+17,r18 + std Y+18,r19 + std Y+19,r8 + std Y+20,r9 + ldi r30,lo8(table_5) + ldi r31,hi8(table_5) +#if defined(RAMPZ) + ldi r18,hh8(table_5) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + cp r22,r20 + breq 5273f + rjmp 61b +5273: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+24,r26 + std Z+25,r27 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Y+1 + ldd r19,Y+2 + st Z,r18 + std Z+1,r19 + ldd r18,Y+3 + ldd r19,Y+4 + std Z+2,r18 + std Z+3,r19 + ldd r18,Y+5 + ldd r19,Y+6 + std Z+4,r18 + std Z+5,r19 + ldd r18,Y+7 + ldd r19,Y+8 + std Z+6,r18 + std Z+7,r19 + ldd r18,Y+9 + ldd r19,Y+10 + std Z+8,r18 + std Z+9,r19 + ldd r18,Y+11 + ldd r19,Y+12 + std Z+10,r18 + std Z+11,r19 + ldd r18,Y+13 + ldd r19,Y+14 + std Z+12,r18 + std Z+13,r19 + ldd r18,Y+15 + ldd r19,Y+16 + std Z+14,r18 + std Z+15,r19 + ldd r18,Y+17 + ldd r19,Y+18 + std Z+16,r18 + std Z+17,r19 + ldd r18,Y+19 + ldd r19,Y+20 + std Z+18,r18 + std Z+19,r19 + ldd r18,Y+21 + ldd r19,Y+22 + std Z+20,r18 + std Z+21,r19 + ldd r18,Y+23 + ldd r19,Y+24 + std Z+22,r18 + std Z+23,r19 + adiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_rounds, .-forkskinny_64_192_rounds + + .text +.global forkskinny_64_192_inv_rounds + .type forkskinny_64_192_inv_rounds, @function +forkskinny_64_192_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 38 + ldd r26,Z+24 + ldd r27,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + ld r18,Z + ldd r19,Z+1 + std Y+1,r18 + std Y+2,r19 + ldd r18,Z+2 + ldd r19,Z+3 + std Y+3,r18 + std Y+4,r19 + ldd r18,Z+4 + ldd r19,Z+5 + std Y+5,r18 + std Y+6,r19 + ldd r18,Z+6 + ldd r19,Z+7 + std Y+7,r18 + std Y+8,r19 + ldd r18,Z+8 + ldd r19,Z+9 + std Y+9,r18 + std Y+10,r19 + ldd r18,Z+10 + ldd r19,Z+11 + std Y+11,r18 + std Y+12,r19 + ldd r18,Z+12 + ldd r19,Z+13 + std Y+13,r18 + std Y+14,r19 + ldd r18,Z+14 + ldd r19,Z+15 + std Y+15,r18 + std Y+16,r19 + ldd r18,Z+16 + ldd r19,Z+17 + std Y+17,r18 + std Y+18,r19 + ldd r18,Z+18 + ldd r19,Z+19 + std Y+19,r18 + std Y+20,r19 + ldd r18,Z+20 + ldd r19,Z+21 + std Y+21,r18 + std Y+22,r19 + ldd r18,Z+22 + ldd r19,Z+23 + std Y+23,r18 + std Y+24,r19 + push r31 + push r30 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 + lsl r20 +61: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r8,Y+3 + ldd r9,Y+4 + ldd r10,Y+5 + ldd r11,Y+6 + ldd r12,Y+7 + ldd r13,Y+8 + std Y+1,r10 + std Y+2,r11 + std Y+3,r12 + std Y+4,r13 + mov r11,r18 + ldi r25,240 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+5,r10 + std Y+6,r11 + std Y+7,r12 + std Y+8,r13 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r8,Y+11 + ldd r9,Y+12 + ldd r10,Y+13 + ldd r11,Y+14 + ldd r12,Y+15 + ldd r13,Y+16 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r11,r18 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+13,r10 + std Y+14,r11 + std Y+15,r12 + std Y+16,r13 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r18,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r8,Y+19 + ldd r9,Y+20 + ldd r10,Y+21 + ldd r11,Y+22 + ldd r12,Y+23 + ldd r13,Y+24 + std Y+17,r10 + std Y+18,r11 + std Y+19,r12 + std Y+20,r13 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r11,r18 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+21,r10 + std Y+22,r11 + std Y+23,r12 + std Y+24,r13 + movw r18,r26 + movw r26,r2 + movw r2,r4 + movw r4,r6 + movw r6,r18 + eor r6,r4 + eor r7,r5 + eor r4,r26 + eor r5,r27 + eor r2,r4 + eor r3,r5 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + mov r0,r5 + mov r5,r4 + mov r4,r0 + mov r0,r7 + mov r7,r6 + mov r6,r0 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + ldd r18,Y+1 + ldd r19,Y+2 + eor r26,r18 + eor r27,r19 + ldd r18,Y+3 + ldd r19,Y+4 + eor r2,r18 + eor r3,r19 + ldd r18,Y+9 + ldd r19,Y+10 + eor r26,r18 + eor r27,r19 + ldd r18,Y+11 + ldd r19,Y+12 + eor r2,r18 + eor r3,r19 + ldd r18,Y+17 + ldd r19,Y+18 + eor r26,r18 + eor r27,r19 + ldd r18,Y+19 + ldd r19,Y+20 + eor r2,r18 + eor r3,r19 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r3,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r27,r18 + ldi r18,32 + eor r5,r18 + eor r26,r18 + ldi r30,lo8(table_6) + ldi r31,hi8(table_6) +#if defined(RAMPZ) + ldi r18,hh8(table_6) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + cp r22,r20 + breq 5268f + rjmp 61b +5268: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+24,r26 + std Z+25,r27 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Y+1 + ldd r19,Y+2 + st Z,r18 + std Z+1,r19 + ldd r18,Y+3 + ldd r19,Y+4 + std Z+2,r18 + std Z+3,r19 + ldd r18,Y+5 + ldd r19,Y+6 + std Z+4,r18 + std Z+5,r19 + ldd r18,Y+7 + ldd r19,Y+8 + std Z+6,r18 + std Z+7,r19 + ldd r18,Y+9 + ldd r19,Y+10 + std Z+8,r18 + std Z+9,r19 + ldd r18,Y+11 + ldd r19,Y+12 + std Z+10,r18 + std Z+11,r19 + ldd r18,Y+13 + ldd r19,Y+14 + std Z+12,r18 + std Z+13,r19 + ldd r18,Y+15 + ldd r19,Y+16 + std Z+14,r18 + std Z+15,r19 + ldd r18,Y+17 + ldd r19,Y+18 + std Z+16,r18 + std Z+17,r19 + ldd r18,Y+19 + ldd r19,Y+20 + std Z+18,r18 + std Z+19,r19 + ldd r18,Y+21 + ldd r19,Y+22 + std Z+20,r18 + std Z+21,r19 + ldd r18,Y+23 + ldd r19,Y+24 + std Z+22,r18 + std Z+23,r19 + adiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_inv_rounds, .-forkskinny_64_192_inv_rounds + + .text +.global forkskinny_64_192_forward_tk + .type forkskinny_64_192_forward_tk, @function +forkskinny_64_192_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r28,Z+6 + ldd r29,Z+7 + ldd r2,Z+8 + ldd r3,Z+9 + ldd r4,Z+10 + ldd r5,Z+11 + ldd r6,Z+12 + ldd r7,Z+13 + ldd r8,Z+14 + ldd r9,Z+15 + ldd r10,Z+16 + ldd r11,Z+17 + ldd r12,Z+18 + ldd r13,Z+19 + ldd r14,Z+20 + ldd r15,Z+21 + ldd r24,Z+22 + ldd r25,Z+23 + push r31 + push r30 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +27: + push r19 + push r18 + push r21 + push r20 + mov r19,r27 + swap r19 + andi r19,240 + mov r23,r28 + andi r23,15 + or r19,r23 + mov r18,r27 + andi r18,240 + mov r23,r29 + andi r23,15 + or r18,r23 + mov r21,r26 + andi r21,240 + swap r28 + andi r28,15 + or r21,r28 + mov r20,r29 + andi r20,240 + andi r26,15 + or r20,r26 + pop r28 + pop r29 + pop r26 + pop r27 + push r3 + push r2 + push r5 + push r4 + mov r3,r7 + swap r3 + ldi r17,240 + and r3,r17 + mov r23,r8 + andi r23,15 + or r3,r23 + mov r2,r7 + and r2,r17 + mov r23,r9 + andi r23,15 + or r2,r23 + mov r5,r6 + and r5,r17 + swap r8 + ldi r16,15 + and r8,r16 + or r5,r8 + mov r4,r9 + and r4,r17 + and r6,r16 + or r4,r6 + pop r8 + pop r9 + pop r6 + pop r7 + push r11 + push r10 + push r13 + push r12 + mov r11,r15 + swap r11 + and r11,r17 + mov r23,r24 + andi r23,15 + or r11,r23 + mov r10,r15 + and r10,r17 + mov r23,r25 + andi r23,15 + or r10,r23 + mov r13,r14 + and r13,r17 + swap r24 + andi r24,15 + or r13,r24 + mov r12,r25 + and r12,r17 + and r14,r16 + or r12,r14 + pop r24 + pop r25 + pop r14 + pop r15 + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + dec r22 + breq 5125f + rjmp 27b +5125: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r26 + std Z+5,r27 + std Z+6,r28 + std Z+7,r29 + std Z+8,r2 + std Z+9,r3 + std Z+10,r4 + std Z+11,r5 + std Z+12,r6 + std Z+13,r7 + std Z+14,r8 + std Z+15,r9 + std Z+16,r10 + std Z+17,r11 + std Z+18,r12 + std Z+19,r13 + std Z+20,r14 + std Z+21,r15 + std Z+22,r24 + std Z+23,r25 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_forward_tk, .-forkskinny_64_192_forward_tk + + .text +.global forkskinny_64_192_reverse_tk + .type forkskinny_64_192_reverse_tk, @function +forkskinny_64_192_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r28,Z+6 + ldd r29,Z+7 + ldd r2,Z+8 + ldd r3,Z+9 + ldd r4,Z+10 + ldd r5,Z+11 + ldd r6,Z+12 + ldd r7,Z+13 + ldd r8,Z+14 + ldd r9,Z+15 + ldd r10,Z+16 + ldd r11,Z+17 + ldd r12,Z+18 + ldd r13,Z+19 + ldd r14,Z+20 + ldd r15,Z+21 + ldd r24,Z+22 + ldd r25,Z+23 + push r31 + push r30 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +27: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + push r27 + push r26 + push r29 + push r28 + mov r27,r18 + andi r27,240 + mov r23,r19 + swap r23 + andi r23,15 + or r27,r23 + mov r26,r21 + andi r26,240 + mov r23,r20 + andi r23,15 + or r26,r23 + mov r29,r20 + andi r29,240 + andi r18,15 + or r29,r18 + mov r28,r21 + swap r28 + andi r28,240 + andi r19,15 + or r28,r19 + pop r20 + pop r21 + pop r18 + pop r19 + push r7 + push r6 + push r9 + push r8 + mov r7,r2 + ldi r17,240 + and r7,r17 + mov r23,r3 + swap r23 + andi r23,15 + or r7,r23 + mov r6,r5 + and r6,r17 + mov r23,r4 + andi r23,15 + or r6,r23 + mov r9,r4 + and r9,r17 + ldi r16,15 + and r2,r16 + or r9,r2 + mov r8,r5 + swap r8 + and r8,r17 + and r3,r16 + or r8,r3 + pop r4 + pop r5 + pop r2 + pop r3 + push r15 + push r14 + push r25 + push r24 + mov r15,r10 + and r15,r17 + mov r23,r11 + swap r23 + andi r23,15 + or r15,r23 + mov r14,r13 + and r14,r17 + mov r23,r12 + andi r23,15 + or r14,r23 + mov r25,r12 + andi r25,240 + and r10,r16 + or r25,r10 + mov r24,r13 + swap r24 + andi r24,240 + and r11,r16 + or r24,r11 + pop r12 + pop r13 + pop r10 + pop r11 + dec r22 + breq 5125f + rjmp 27b +5125: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r26 + std Z+5,r27 + std Z+6,r28 + std Z+7,r29 + std Z+8,r2 + std Z+9,r3 + std Z+10,r4 + std Z+11,r5 + std Z+12,r6 + std Z+13,r7 + std Z+14,r8 + std Z+15,r9 + std Z+16,r10 + std Z+17,r11 + std Z+18,r12 + std Z+19,r13 + std Z+20,r14 + std Z+21,r15 + std Z+22,r24 + std Z+23,r25 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_reverse_tk, .-forkskinny_64_192_reverse_tk + +#endif diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/rhys/internal-forkskinny.c b/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/rhys/internal-forkskinny.c index b050ff1..6e2ac55 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/rhys/internal-forkskinny.c +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/rhys/internal-forkskinny.c @@ -40,35 +40,10 @@ static unsigned char const RC[87] = { 0x4a, 0x14, 0x29, 0x52, 0x24, 0x48, 0x10 }; -/** - * \brief Number of rounds of ForkSkinny-128-256 before forking. - */ -#define FORKSKINNY_128_256_ROUNDS_BEFORE 21 - -/** - * \brief Number of rounds of ForkSkinny-128-256 after forking. - */ -#define FORKSKINNY_128_256_ROUNDS_AFTER 27 - -/** - * \brief State information for ForkSkinny-128-256. - */ -typedef struct -{ - uint32_t TK1[4]; /**< First part of the tweakey */ - uint32_t TK2[4]; /**< Second part of the tweakey */ - uint32_t S[4]; /**< Current block state */ +#if !defined(__AVR__) -} forkskinny_128_256_state_t; - -/** - * \brief Applies one round of ForkSkinny-128-256. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_256_round - (forkskinny_128_256_state_t *state, unsigned round) +void forkskinny_128_256_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -79,137 +54,52 @@ static void forkskinny_128_256_round s2 = state->S[2]; s3 = state->S[3]; - /* Apply the S-box to all cells in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny128_sbox(s0); + skinny128_sbox(s1); + skinny128_sbox(s2); + skinny128_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); + s2 ^= 0x02; + + /* Shift the cells in the rows right, which moves the cell + * values up closer to the MSB. That is, we do a left rotate + * on the word to rotate the cells in the word right */ + s1 = leftRotate8(s1); + s2 = leftRotate16(s2); + s3 = leftRotate24(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + + /* Permute TK1 and TK2 for the next round */ + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + } /* Save the local variables back to the state */ state->S[0] = s0; state->S[1] = s1; state->S[2] = s2; state->S[3] = s3; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(state->TK1); - skinny128_permute_tk(state->TK2); - skinny128_LFSR2(state->TK2[0]); - skinny128_LFSR2(state->TK2[1]); -} - -void forkskinny_128_256_encrypt - (const unsigned char key[32], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_256_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_128_256_ROUNDS_BEFORE; ++round) { - forkskinny_128_256_round(&state, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint32_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&state, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x08040201U; /* Branching constant */ - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - for (round = (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); ++round) { - forkskinny_128_256_round(&state, round); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&state, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - } } -/** - * \brief Applies one round of ForkSkinny-128-256 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_256_inv_round - (forkskinny_128_256_state_t *state, unsigned round) +void forkskinny_128_256_inv_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -220,39 +110,42 @@ static void forkskinny_128_256_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Permute TK1 and TK2 for the next round */ - skinny128_inv_LFSR2(state->TK2[0]); - skinny128_inv_LFSR2(state->TK2[1]); - skinny128_inv_permute_tk(state->TK1); - skinny128_inv_permute_tk(state->TK2); - - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left, which moves the cell - * values down closer to the LSB. That is, we do a right - * rotate on the word to rotate the cells in the word left */ - s1 = rightRotate8(s1); - s2 = rightRotate16(s2); - s3 = rightRotate24(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1 and TK2 for the next round */ + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left, which moves the cell + * values down closer to the LSB. That is, we do a right + * rotate on the word to rotate the cells in the word left */ + s1 = rightRotate8(s1); + s2 = rightRotate16(s2); + s3 = rightRotate24(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); + s2 ^= 0x02; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny128_inv_sbox(s0); + skinny128_inv_sbox(s1); + skinny128_inv_sbox(s2); + skinny128_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -261,115 +154,64 @@ static void forkskinny_128_256_inv_round state->S[3] = s3; } -void forkskinny_128_256_decrypt - (const unsigned char key[32], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_128_256_forward_tk + (forkskinny_128_256_state_t *state, unsigned rounds) { - forkskinny_128_256_state_t state; - forkskinny_128_256_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); ++round) { - skinny128_permute_tk(state.TK1); - skinny128_permute_tk(state.TK2); - skinny128_LFSR2(state.TK2[0]); - skinny128_LFSR2(state.TK2[1]); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR2(state->TK2[2]); + skinny128_LFSR2(state->TK2[3]); + } + rounds -= 16; } - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); - round > (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); --round) { - forkskinny_128_256_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + --rounds; } +} - /* Remove the branching constant */ - state.S[0] ^= 0x08040201U; - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_128_256_ROUNDS_AFTER; ++round) { - skinny128_inv_LFSR2(state.TK2[0]); - skinny128_inv_LFSR2(state.TK2[1]); - skinny128_inv_permute_tk(state.TK1); - skinny128_inv_permute_tk(state.TK2); +void forkskinny_128_256_reverse_tk + (forkskinny_128_256_state_t *state, unsigned rounds) +{ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR2(state->TK2[2]); + skinny128_inv_LFSR2(state->TK2[3]); + } + rounds -= 16; } - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_128_256_inv_round(&state, round - 1); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&fstate, round); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + --rounds; } - le_store_word32(output_right, fstate.S[0]); - le_store_word32(output_right + 4, fstate.S[1]); - le_store_word32(output_right + 8, fstate.S[2]); - le_store_word32(output_right + 12, fstate.S[3]); } -/** - * \brief Number of rounds of ForkSkinny-128-384 before forking. - */ -#define FORKSKINNY_128_384_ROUNDS_BEFORE 25 - -/** - * \brief Number of rounds of ForkSkinny-128-384 after forking. - */ -#define FORKSKINNY_128_384_ROUNDS_AFTER 31 - -/** - * \brief State information for ForkSkinny-128-384. - */ -typedef struct -{ - uint32_t TK1[4]; /**< First part of the tweakey */ - uint32_t TK2[4]; /**< Second part of the tweakey */ - uint32_t TK3[4]; /**< Third part of the tweakey */ - uint32_t S[4]; /**< Current block state */ - -} forkskinny_128_384_state_t; - -/** - * \brief Applies one round of ForkSkinny-128-384. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_384_round - (forkskinny_128_384_state_t *state, unsigned round) +void forkskinny_128_384_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -380,145 +222,56 @@ static void forkskinny_128_384_round s2 = state->S[2]; s3 = state->S[3]; - /* Apply the S-box to all cells in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - (rc & 0x0F) ^ 0x00020000; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny128_sbox(s0); + skinny128_sbox(s1); + skinny128_sbox(s2); + skinny128_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + (rc & 0x0F) ^ 0x00020000; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); + s2 ^= 0x02; + + /* Shift the cells in the rows right, which moves the cell + * values up closer to the MSB. That is, we do a left rotate + * on the word to rotate the cells in the word right */ + s1 = leftRotate8(s1); + s2 = leftRotate16(s2); + s3 = leftRotate24(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + + /* Permute TK1, TK2, and TK3 for the next round */ + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_permute_tk(state->TK3); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + } /* Save the local variables back to the state */ state->S[0] = s0; state->S[1] = s1; state->S[2] = s2; state->S[3] = s3; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny128_permute_tk(state->TK1); - skinny128_permute_tk(state->TK2); - skinny128_permute_tk(state->TK3); - skinny128_LFSR2(state->TK2[0]); - skinny128_LFSR2(state->TK2[1]); - skinny128_LFSR3(state->TK3[0]); - skinny128_LFSR3(state->TK3[1]); } -void forkskinny_128_384_encrypt - (const unsigned char key[48], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_384_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.TK3[0] = le_load_word32(key + 32); - state.TK3[1] = le_load_word32(key + 36); - state.TK3[2] = le_load_word32(key + 40); - state.TK3[3] = le_load_word32(key + 44); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_128_384_ROUNDS_BEFORE; ++round) { - forkskinny_128_384_round(&state, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint32_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&state, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x08040201U; /* Branching constant */ - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - for (round = (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); ++round) { - forkskinny_128_384_round(&state, round); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&state, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - } -} - -/** - * \brief Applies one round of ForkSkinny-128-384 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_384_inv_round - (forkskinny_128_384_state_t *state, unsigned round) +void forkskinny_128_384_inv_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -529,43 +282,46 @@ static void forkskinny_128_384_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Permute TK1 and TK2 for the next round */ - skinny128_inv_LFSR2(state->TK2[0]); - skinny128_inv_LFSR2(state->TK2[1]); - skinny128_inv_LFSR3(state->TK3[0]); - skinny128_inv_LFSR3(state->TK3[1]); - skinny128_inv_permute_tk(state->TK1); - skinny128_inv_permute_tk(state->TK2); - skinny128_inv_permute_tk(state->TK3); - - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left, which moves the cell - * values down closer to the LSB. That is, we do a right - * rotate on the word to rotate the cells in the word left */ - s1 = rightRotate8(s1); - s2 = rightRotate16(s2); - s3 = rightRotate24(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - (rc & 0x0F) ^ 0x00020000; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1 and TK2 for the next round */ + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + skinny128_inv_permute_tk(state->TK3); + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left, which moves the cell + * values down closer to the LSB. That is, we do a right + * rotate on the word to rotate the cells in the word left */ + s1 = rightRotate8(s1); + s2 = rightRotate16(s2); + s3 = rightRotate24(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + (rc & 0x0F) ^ 0x00020000; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); + s2 ^= 0x02; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny128_inv_sbox(s0); + skinny128_inv_sbox(s1); + skinny128_inv_sbox(s2); + skinny128_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -574,128 +330,78 @@ static void forkskinny_128_384_inv_round state->S[3] = s3; } -void forkskinny_128_384_decrypt - (const unsigned char key[48], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_128_384_forward_tk + (forkskinny_128_384_state_t *state, unsigned rounds) { - forkskinny_128_384_state_t state; - forkskinny_128_384_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.TK3[0] = le_load_word32(key + 32); - state.TK3[1] = le_load_word32(key + 36); - state.TK3[2] = le_load_word32(key + 40); - state.TK3[3] = le_load_word32(key + 44); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); ++round) { - skinny128_permute_tk(state.TK1); - skinny128_permute_tk(state.TK2); - skinny128_permute_tk(state.TK3); - skinny128_LFSR2(state.TK2[0]); - skinny128_LFSR2(state.TK2[1]); - skinny128_LFSR3(state.TK3[0]); - skinny128_LFSR3(state.TK3[1]); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR2(state->TK2[2]); + skinny128_LFSR2(state->TK2[3]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + skinny128_LFSR3(state->TK3[2]); + skinny128_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); - round > (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); --round) { - forkskinny_128_384_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_permute_tk(state->TK3); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + --rounds; } +} - /* Remove the branching constant */ - state.S[0] ^= 0x08040201U; - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_128_384_ROUNDS_AFTER; ++round) { - skinny128_inv_LFSR2(state.TK2[0]); - skinny128_inv_LFSR2(state.TK2[1]); - skinny128_inv_LFSR3(state.TK3[0]); - skinny128_inv_LFSR3(state.TK3[1]); - skinny128_inv_permute_tk(state.TK1); - skinny128_inv_permute_tk(state.TK2); - skinny128_inv_permute_tk(state.TK3); +void forkskinny_128_384_reverse_tk + (forkskinny_128_384_state_t *state, unsigned rounds) +{ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR2(state->TK2[2]); + skinny128_inv_LFSR2(state->TK2[3]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_LFSR3(state->TK3[2]); + skinny128_inv_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_128_384_inv_round(&state, round - 1); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&fstate, round); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + skinny128_inv_permute_tk(state->TK3); + --rounds; } - le_store_word32(output_right, fstate.S[0]); - le_store_word32(output_right + 4, fstate.S[1]); - le_store_word32(output_right + 8, fstate.S[2]); - le_store_word32(output_right + 12, fstate.S[3]); } -/** - * \brief Number of rounds of ForkSkinny-64-192 before forking. - */ -#define FORKSKINNY_64_192_ROUNDS_BEFORE 17 - -/** - * \brief Number of rounds of ForkSkinny-64-192 after forking. - */ -#define FORKSKINNY_64_192_ROUNDS_AFTER 23 - -/** - * \brief State information for ForkSkinny-64-192. - */ -typedef struct -{ - uint16_t TK1[4]; /**< First part of the tweakey */ - uint16_t TK2[4]; /**< Second part of the tweakey */ - uint16_t TK3[4]; /**< Third part of the tweakey */ - uint16_t S[4]; /**< Current block state */ - -} forkskinny_64_192_state_t; - -/** - * \brief Applies one round of ForkSkinny-64-192. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - * - * Note: The cells of each row are order in big-endian nibble order - * so it is easiest to manage the rows in bit-endian byte order. - */ -static void forkskinny_64_192_round - (forkskinny_64_192_state_t *state, unsigned round) +void forkskinny_64_192_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last) { uint16_t s0, s1, s2, s3, temp; uint8_t rc; @@ -706,144 +412,55 @@ static void forkskinny_64_192_round s2 = state->S[2]; s3 = state->S[3]; - /* Apply the S-box to all cells in the state */ - skinny64_sbox(s0); - skinny64_sbox(s1); - skinny64_sbox(s2); - skinny64_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - ((rc & 0x0F) << 12) ^ 0x0020; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ - ((rc & 0x70) << 8); - s2 ^= 0x2000; - - /* Shift the cells in the rows right */ - s1 = rightRotate4_16(s1); - s2 = rightRotate8_16(s2); - s3 = rightRotate12_16(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny64_sbox(s0); + skinny64_sbox(s1); + skinny64_sbox(s2); + skinny64_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + ((rc & 0x0F) << 12) ^ 0x0020; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ + ((rc & 0x70) << 8); + s2 ^= 0x2000; + + /* Shift the cells in the rows right */ + s1 = rightRotate4_16(s1); + s2 = rightRotate8_16(s2); + s3 = rightRotate12_16(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + + /* Permute TK1, TK2, and TK3 for the next round */ + skinny64_permute_tk(state->TK1); + skinny64_permute_tk(state->TK2); + skinny64_permute_tk(state->TK3); + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + } /* Save the local variables back to the state */ state->S[0] = s0; state->S[1] = s1; state->S[2] = s2; state->S[3] = s3; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny64_permute_tk(state->TK1); - skinny64_permute_tk(state->TK2); - skinny64_permute_tk(state->TK3); - skinny64_LFSR2(state->TK2[0]); - skinny64_LFSR2(state->TK2[1]); - skinny64_LFSR3(state->TK3[0]); - skinny64_LFSR3(state->TK3[1]); } -void forkskinny_64_192_encrypt - (const unsigned char key[24], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_64_192_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = be_load_word16(key); - state.TK1[1] = be_load_word16(key + 2); - state.TK1[2] = be_load_word16(key + 4); - state.TK1[3] = be_load_word16(key + 6); - state.TK2[0] = be_load_word16(key + 8); - state.TK2[1] = be_load_word16(key + 10); - state.TK2[2] = be_load_word16(key + 12); - state.TK2[3] = be_load_word16(key + 14); - state.TK3[0] = be_load_word16(key + 16); - state.TK3[1] = be_load_word16(key + 18); - state.TK3[2] = be_load_word16(key + 20); - state.TK3[3] = be_load_word16(key + 22); - state.S[0] = be_load_word16(input); - state.S[1] = be_load_word16(input + 2); - state.S[2] = be_load_word16(input + 4); - state.S[3] = be_load_word16(input + 6); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_64_192_ROUNDS_BEFORE; ++round) { - forkskinny_64_192_round(&state, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint16_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_right, state.S[0]); - be_store_word16(output_right + 2, state.S[1]); - be_store_word16(output_right + 4, state.S[2]); - be_store_word16(output_right + 6, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x1249U; /* Branching constant */ - state.S[1] ^= 0x36daU; - state.S[2] ^= 0x5b7fU; - state.S[3] ^= 0xec81U; - for (round = (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_left, state.S[0]); - be_store_word16(output_left + 2, state.S[1]); - be_store_word16(output_left + 4, state.S[2]); - be_store_word16(output_left + 6, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_right, state.S[0]); - be_store_word16(output_right + 2, state.S[1]); - be_store_word16(output_right + 4, state.S[2]); - be_store_word16(output_right + 6, state.S[3]); - } -} - -/** - * \brief Applies one round of ForkSkinny-64-192 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_64_192_inv_round - (forkskinny_64_192_state_t *state, unsigned round) +void forkskinny_64_192_inv_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last) { uint16_t s0, s1, s2, s3, temp; uint8_t rc; @@ -854,42 +471,45 @@ static void forkskinny_64_192_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Permute TK1, TK2, and TK3 for the next round */ - skinny64_inv_LFSR2(state->TK2[0]); - skinny64_inv_LFSR2(state->TK2[1]); - skinny64_inv_LFSR3(state->TK3[0]); - skinny64_inv_LFSR3(state->TK3[1]); - skinny64_inv_permute_tk(state->TK1); - skinny64_inv_permute_tk(state->TK2); - skinny64_inv_permute_tk(state->TK3); - - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left */ - s1 = leftRotate4_16(s1); - s2 = leftRotate8_16(s2); - s3 = leftRotate12_16(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - ((rc & 0x0F) << 12) ^ 0x0020; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ - ((rc & 0x70) << 8); - s2 ^= 0x2000; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny64_inv_sbox(s0); - skinny64_inv_sbox(s1); - skinny64_inv_sbox(s2); - skinny64_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1, TK2, and TK3 for the next round */ + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_permute_tk(state->TK1); + skinny64_inv_permute_tk(state->TK2); + skinny64_inv_permute_tk(state->TK3); + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left */ + s1 = leftRotate4_16(s1); + s2 = leftRotate8_16(s2); + s3 = leftRotate12_16(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + ((rc & 0x0F) << 12) ^ 0x0020; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ + ((rc & 0x70) << 8); + s2 ^= 0x2000; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny64_inv_sbox(s0); + skinny64_inv_sbox(s1); + skinny64_inv_sbox(s2); + skinny64_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -898,91 +518,74 @@ static void forkskinny_64_192_inv_round state->S[3] = s3; } -void forkskinny_64_192_decrypt - (const unsigned char key[24], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_64_192_forward_tk + (forkskinny_64_192_state_t *state, unsigned rounds) { - forkskinny_64_192_state_t state; - forkskinny_64_192_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = be_load_word16(key); - state.TK1[1] = be_load_word16(key + 2); - state.TK1[2] = be_load_word16(key + 4); - state.TK1[3] = be_load_word16(key + 6); - state.TK2[0] = be_load_word16(key + 8); - state.TK2[1] = be_load_word16(key + 10); - state.TK2[2] = be_load_word16(key + 12); - state.TK2[3] = be_load_word16(key + 14); - state.TK3[0] = be_load_word16(key + 16); - state.TK3[1] = be_load_word16(key + 18); - state.TK3[2] = be_load_word16(key + 20); - state.TK3[3] = be_load_word16(key + 22); - state.S[0] = be_load_word16(input); - state.S[1] = be_load_word16(input + 2); - state.S[2] = be_load_word16(input + 4); - state.S[3] = be_load_word16(input + 6); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); ++round) { - skinny64_permute_tk(state.TK1); - skinny64_permute_tk(state.TK2); - skinny64_permute_tk(state.TK3); - skinny64_LFSR2(state.TK2[0]); - skinny64_LFSR2(state.TK2[1]); - skinny64_LFSR3(state.TK3[0]); - skinny64_LFSR3(state.TK3[1]); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny64_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR2(state->TK2[2]); + skinny64_LFSR2(state->TK2[3]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + skinny64_LFSR3(state->TK3[2]); + skinny64_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); - round > (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); --round) { - forkskinny_64_192_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny64_permute_tk(state->TK1); + skinny64_permute_tk(state->TK2); + skinny64_permute_tk(state->TK3); + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + --rounds; } +} - /* Remove the branching constant */ - state.S[0] ^= 0x1249U; - state.S[1] ^= 0x36daU; - state.S[2] ^= 0x5b7fU; - state.S[3] ^= 0xec81U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_64_192_ROUNDS_AFTER; ++round) { - skinny64_inv_LFSR2(state.TK2[0]); - skinny64_inv_LFSR2(state.TK2[1]); - skinny64_inv_LFSR3(state.TK3[0]); - skinny64_inv_LFSR3(state.TK3[1]); - skinny64_inv_permute_tk(state.TK1); - skinny64_inv_permute_tk(state.TK2); - skinny64_inv_permute_tk(state.TK3); +void forkskinny_64_192_reverse_tk + (forkskinny_64_192_state_t *state, unsigned rounds) +{ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny64_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR2(state->TK2[2]); + skinny64_inv_LFSR2(state->TK2[3]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_LFSR3(state->TK3[2]); + skinny64_inv_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_64_192_inv_round(&state, round - 1); - } - be_store_word16(output_left, state.S[0]); - be_store_word16(output_left + 2, state.S[1]); - be_store_word16(output_left + 4, state.S[2]); - be_store_word16(output_left + 6, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&fstate, round); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_permute_tk(state->TK1); + skinny64_inv_permute_tk(state->TK2); + skinny64_inv_permute_tk(state->TK3); + --rounds; } - be_store_word16(output_right, fstate.S[0]); - be_store_word16(output_right + 2, fstate.S[1]); - be_store_word16(output_right + 4, fstate.S[2]); - be_store_word16(output_right + 6, fstate.S[3]); } + +#endif /* !__AVR__ */ diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/rhys/internal-forkskinny.h b/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/rhys/internal-forkskinny.h index 0c1a707..e3014d3 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/rhys/internal-forkskinny.h +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/rhys/internal-forkskinny.h @@ -23,6 +23,8 @@ #ifndef LW_INTERNAL_FORKSKINNY_H #define LW_INTERNAL_FORKSKINNY_H +#include "internal-util.h" + /** * \file internal-forkskinny.h * \brief ForkSkinny block cipher family. @@ -39,6 +41,158 @@ extern "C" { #endif /** + * \brief State information for ForkSkinny-128-256. + */ +typedef struct +{ + uint32_t TK1[4]; /**< First part of the tweakey */ + uint32_t TK2[4]; /**< Second part of the tweakey */ + uint32_t S[4]; /**< Current block state */ + +} forkskinny_128_256_state_t; + +/** + * \brief State information for ForkSkinny-128-384. + */ +typedef struct +{ + uint32_t TK1[4]; /**< First part of the tweakey */ + uint32_t TK2[4]; /**< Second part of the tweakey */ + uint32_t TK3[4]; /**< Third part of the tweakey */ + uint32_t S[4]; /**< Current block state */ + +} forkskinny_128_384_state_t; + +/** + * \brief State information for ForkSkinny-64-192. + */ +typedef struct +{ + uint16_t TK1[4]; /**< First part of the tweakey */ + uint16_t TK2[4]; /**< Second part of the tweakey */ + uint16_t TK3[4]; /**< Third part of the tweakey */ + uint16_t S[4]; /**< Current block state */ + +} forkskinny_64_192_state_t; + +/** + * \brief Applies several rounds of ForkSkinny-128-256. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + */ +void forkskinny_128_256_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-128-256 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_128_256_inv_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-128-256. + * + * \param state Points to the ForkSkinny-128-256 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_128_256_forward_tk + (forkskinny_128_256_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-128-256. + * + * \param state Points to the ForkSkinny-128-256 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_128_256_reverse_tk + (forkskinny_128_256_state_t *state, unsigned rounds); + +/** + * \brief Applies several rounds of ForkSkinny-128-384. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + */ +void forkskinny_128_384_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-128-384 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_128_384_inv_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-128-384. + * + * \param state Points to the ForkSkinny-128-384 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_128_384_forward_tk + (forkskinny_128_384_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-128-384. + * + * \param state Points to the ForkSkinny-128-384 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_128_384_reverse_tk + (forkskinny_128_384_state_t *state, unsigned rounds); + +/** + * \brief Applies several rounds of ForkSkinny-64-192. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + * + * Note: The cells of each row are ordered in big-endian nibble order + * so it is simplest to manage the rows in big-endian byte order. + */ +void forkskinny_64_192_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-64-192 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_64_192_inv_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-64-192. + * + * \param state Points to the ForkSkinny-64-192 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_64_192_forward_tk + (forkskinny_64_192_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-64-192. + * + * \param state Points to the ForkSkinny-64-192 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_64_192_reverse_tk + (forkskinny_64_192_state_t *state, unsigned rounds); + +/** * \brief Encrypts a block of plaintext with ForkSkinny-128-256. * * \param key 256-bit tweakey for ForkSkinny-128-256. diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/rhys/internal-skinnyutil.h b/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/rhys/internal-skinnyutil.h index 83136cb..8a5296d 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/rhys/internal-skinnyutil.h +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/rhys/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/rhys/forkae.c b/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/rhys/forkae.c index 4a9671a..49e7610 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/rhys/forkae.c +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/rhys/forkae.c @@ -22,7 +22,6 @@ #include "forkae.h" #include "internal-forkskinny.h" -#include "internal-util.h" #include aead_cipher_t const forkae_paef_64_192_cipher = { @@ -138,3 +137,476 @@ aead_cipher_t const forkae_saef_128_256_cipher = { #define FORKAE_TWEAKEY_REDUCED_SIZE 32 #define FORKAE_BLOCK_FUNC forkskinny_128_256 #include "internal-forkae-saef.h" + +/* Helper functions to implement the forking encrypt/decrypt block operations + * on top of the basic "perform N rounds" functions in internal-forkskinny.c */ + +/** + * \brief Number of rounds of ForkSkinny-128-256 before forking. + */ +#define FORKSKINNY_128_256_ROUNDS_BEFORE 21 + +/** + * \brief Number of rounds of ForkSkinny-128-256 after forking. + */ +#define FORKSKINNY_128_256_ROUNDS_AFTER 27 + +void forkskinny_128_256_encrypt + (const unsigned char key[32], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_256_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Run all of the rounds before the forking point */ + forkskinny_128_256_rounds(&state, 0, FORKSKINNY_128_256_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint32_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x08040201U; /* Branching constant */ + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + } +} + +void forkskinny_128_256_decrypt + (const unsigned char key[32], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_256_state_t state; + forkskinny_128_256_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_128_256_forward_tk + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_128_256_inv_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x08040201U; + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_128_256_reverse_tk(&state, FORKSKINNY_128_256_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_128_256_inv_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, 0); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_128_256_rounds + (&fstate, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, fstate.S[0]); + le_store_word32(output_right + 4, fstate.S[1]); + le_store_word32(output_right + 8, fstate.S[2]); + le_store_word32(output_right + 12, fstate.S[3]); +} + +/** + * \brief Number of rounds of ForkSkinny-128-384 before forking. + */ +#define FORKSKINNY_128_384_ROUNDS_BEFORE 25 + +/** + * \brief Number of rounds of ForkSkinny-128-384 after forking. + */ +#define FORKSKINNY_128_384_ROUNDS_AFTER 31 + +void forkskinny_128_384_encrypt + (const unsigned char key[48], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_384_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.TK3[0] = le_load_word32(key + 32); + state.TK3[1] = le_load_word32(key + 36); + state.TK3[2] = le_load_word32(key + 40); + state.TK3[3] = le_load_word32(key + 44); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Run all of the rounds before the forking point */ + forkskinny_128_384_rounds(&state, 0, FORKSKINNY_128_384_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint32_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x08040201U; /* Branching constant */ + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + } +} + +void forkskinny_128_384_decrypt + (const unsigned char key[48], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_384_state_t state; + forkskinny_128_384_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.TK3[0] = le_load_word32(key + 32); + state.TK3[1] = le_load_word32(key + 36); + state.TK3[2] = le_load_word32(key + 40); + state.TK3[3] = le_load_word32(key + 44); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_128_384_forward_tk + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_128_384_inv_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x08040201U; + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_128_384_reverse_tk(&state, FORKSKINNY_128_384_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_128_384_inv_rounds(&state, FORKSKINNY_128_384_ROUNDS_BEFORE, 0); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_128_384_rounds + (&fstate, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, fstate.S[0]); + le_store_word32(output_right + 4, fstate.S[1]); + le_store_word32(output_right + 8, fstate.S[2]); + le_store_word32(output_right + 12, fstate.S[3]); +} + +/** + * \brief Number of rounds of ForkSkinny-64-192 before forking. + */ +#define FORKSKINNY_64_192_ROUNDS_BEFORE 17 + +/** + * \brief Number of rounds of ForkSkinny-64-192 after forking. + */ +#define FORKSKINNY_64_192_ROUNDS_AFTER 23 + +void forkskinny_64_192_encrypt + (const unsigned char key[24], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_64_192_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = be_load_word16(key); + state.TK1[1] = be_load_word16(key + 2); + state.TK1[2] = be_load_word16(key + 4); + state.TK1[3] = be_load_word16(key + 6); + state.TK2[0] = be_load_word16(key + 8); + state.TK2[1] = be_load_word16(key + 10); + state.TK2[2] = be_load_word16(key + 12); + state.TK2[3] = be_load_word16(key + 14); + state.TK3[0] = be_load_word16(key + 16); + state.TK3[1] = be_load_word16(key + 18); + state.TK3[2] = be_load_word16(key + 20); + state.TK3[3] = be_load_word16(key + 22); + state.S[0] = be_load_word16(input); + state.S[1] = be_load_word16(input + 2); + state.S[2] = be_load_word16(input + 4); + state.S[3] = be_load_word16(input + 6); + + /* Run all of the rounds before the forking point */ + forkskinny_64_192_rounds(&state, 0, FORKSKINNY_64_192_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint16_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, state.S[0]); + be_store_word16(output_right + 2, state.S[1]); + be_store_word16(output_right + 4, state.S[2]); + be_store_word16(output_right + 6, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x1249U; /* Branching constant */ + state.S[1] ^= 0x36daU; + state.S[2] ^= 0x5b7fU; + state.S[3] ^= 0xec81U; + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2); + be_store_word16(output_left, state.S[0]); + be_store_word16(output_left + 2, state.S[1]); + be_store_word16(output_left + 4, state.S[2]); + be_store_word16(output_left + 6, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, state.S[0]); + be_store_word16(output_right + 2, state.S[1]); + be_store_word16(output_right + 4, state.S[2]); + be_store_word16(output_right + 6, state.S[3]); + } +} + +void forkskinny_64_192_decrypt + (const unsigned char key[24], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_64_192_state_t state; + forkskinny_64_192_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = be_load_word16(key); + state.TK1[1] = be_load_word16(key + 2); + state.TK1[2] = be_load_word16(key + 4); + state.TK1[3] = be_load_word16(key + 6); + state.TK2[0] = be_load_word16(key + 8); + state.TK2[1] = be_load_word16(key + 10); + state.TK2[2] = be_load_word16(key + 12); + state.TK2[3] = be_load_word16(key + 14); + state.TK3[0] = be_load_word16(key + 16); + state.TK3[1] = be_load_word16(key + 18); + state.TK3[2] = be_load_word16(key + 20); + state.TK3[3] = be_load_word16(key + 22); + state.S[0] = be_load_word16(input); + state.S[1] = be_load_word16(input + 2); + state.S[2] = be_load_word16(input + 4); + state.S[3] = be_load_word16(input + 6); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_64_192_forward_tk + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_64_192_inv_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x1249U; + state.S[1] ^= 0x36daU; + state.S[2] ^= 0x5b7fU; + state.S[3] ^= 0xec81U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_64_192_reverse_tk(&state, FORKSKINNY_64_192_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_64_192_inv_rounds(&state, FORKSKINNY_64_192_ROUNDS_BEFORE, 0); + be_store_word16(output_left, state.S[0]); + be_store_word16(output_left + 2, state.S[1]); + be_store_word16(output_left + 4, state.S[2]); + be_store_word16(output_left + 6, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_64_192_rounds + (&fstate, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, fstate.S[0]); + be_store_word16(output_right + 2, fstate.S[1]); + be_store_word16(output_right + 4, fstate.S[2]); + be_store_word16(output_right + 6, fstate.S[3]); +} diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/rhys/internal-forkskinny-avr.S b/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/rhys/internal-forkskinny-avr.S new file mode 100644 index 0000000..c7e0b37 --- /dev/null +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/rhys/internal-forkskinny-avr.S @@ -0,0 +1,8880 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_0, @object + .size table_0, 256 +table_0: + .byte 101 + .byte 76 + .byte 106 + .byte 66 + .byte 75 + .byte 99 + .byte 67 + .byte 107 + .byte 85 + .byte 117 + .byte 90 + .byte 122 + .byte 83 + .byte 115 + .byte 91 + .byte 123 + .byte 53 + .byte 140 + .byte 58 + .byte 129 + .byte 137 + .byte 51 + .byte 128 + .byte 59 + .byte 149 + .byte 37 + .byte 152 + .byte 42 + .byte 144 + .byte 35 + .byte 153 + .byte 43 + .byte 229 + .byte 204 + .byte 232 + .byte 193 + .byte 201 + .byte 224 + .byte 192 + .byte 233 + .byte 213 + .byte 245 + .byte 216 + .byte 248 + .byte 208 + .byte 240 + .byte 217 + .byte 249 + .byte 165 + .byte 28 + .byte 168 + .byte 18 + .byte 27 + .byte 160 + .byte 19 + .byte 169 + .byte 5 + .byte 181 + .byte 10 + .byte 184 + .byte 3 + .byte 176 + .byte 11 + .byte 185 + .byte 50 + .byte 136 + .byte 60 + .byte 133 + .byte 141 + .byte 52 + .byte 132 + .byte 61 + .byte 145 + .byte 34 + .byte 156 + .byte 44 + .byte 148 + .byte 36 + .byte 157 + .byte 45 + .byte 98 + .byte 74 + .byte 108 + .byte 69 + .byte 77 + .byte 100 + .byte 68 + .byte 109 + .byte 82 + .byte 114 + .byte 92 + .byte 124 + .byte 84 + .byte 116 + .byte 93 + .byte 125 + .byte 161 + .byte 26 + .byte 172 + .byte 21 + .byte 29 + .byte 164 + .byte 20 + .byte 173 + .byte 2 + .byte 177 + .byte 12 + .byte 188 + .byte 4 + .byte 180 + .byte 13 + .byte 189 + .byte 225 + .byte 200 + .byte 236 + .byte 197 + .byte 205 + .byte 228 + .byte 196 + .byte 237 + .byte 209 + .byte 241 + .byte 220 + .byte 252 + .byte 212 + .byte 244 + .byte 221 + .byte 253 + .byte 54 + .byte 142 + .byte 56 + .byte 130 + .byte 139 + .byte 48 + .byte 131 + .byte 57 + .byte 150 + .byte 38 + .byte 154 + .byte 40 + .byte 147 + .byte 32 + .byte 155 + .byte 41 + .byte 102 + .byte 78 + .byte 104 + .byte 65 + .byte 73 + .byte 96 + .byte 64 + .byte 105 + .byte 86 + .byte 118 + .byte 88 + .byte 120 + .byte 80 + .byte 112 + .byte 89 + .byte 121 + .byte 166 + .byte 30 + .byte 170 + .byte 17 + .byte 25 + .byte 163 + .byte 16 + .byte 171 + .byte 6 + .byte 182 + .byte 8 + .byte 186 + .byte 0 + .byte 179 + .byte 9 + .byte 187 + .byte 230 + .byte 206 + .byte 234 + .byte 194 + .byte 203 + .byte 227 + .byte 195 + .byte 235 + .byte 214 + .byte 246 + .byte 218 + .byte 250 + .byte 211 + .byte 243 + .byte 219 + .byte 251 + .byte 49 + .byte 138 + .byte 62 + .byte 134 + .byte 143 + .byte 55 + .byte 135 + .byte 63 + .byte 146 + .byte 33 + .byte 158 + .byte 46 + .byte 151 + .byte 39 + .byte 159 + .byte 47 + .byte 97 + .byte 72 + .byte 110 + .byte 70 + .byte 79 + .byte 103 + .byte 71 + .byte 111 + .byte 81 + .byte 113 + .byte 94 + .byte 126 + .byte 87 + .byte 119 + .byte 95 + .byte 127 + .byte 162 + .byte 24 + .byte 174 + .byte 22 + .byte 31 + .byte 167 + .byte 23 + .byte 175 + .byte 1 + .byte 178 + .byte 14 + .byte 190 + .byte 7 + .byte 183 + .byte 15 + .byte 191 + .byte 226 + .byte 202 + .byte 238 + .byte 198 + .byte 207 + .byte 231 + .byte 199 + .byte 239 + .byte 210 + .byte 242 + .byte 222 + .byte 254 + .byte 215 + .byte 247 + .byte 223 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_1, @object + .size table_1, 256 +table_1: + .byte 172 + .byte 232 + .byte 104 + .byte 60 + .byte 108 + .byte 56 + .byte 168 + .byte 236 + .byte 170 + .byte 174 + .byte 58 + .byte 62 + .byte 106 + .byte 110 + .byte 234 + .byte 238 + .byte 166 + .byte 163 + .byte 51 + .byte 54 + .byte 102 + .byte 99 + .byte 227 + .byte 230 + .byte 225 + .byte 164 + .byte 97 + .byte 52 + .byte 49 + .byte 100 + .byte 161 + .byte 228 + .byte 141 + .byte 201 + .byte 73 + .byte 29 + .byte 77 + .byte 25 + .byte 137 + .byte 205 + .byte 139 + .byte 143 + .byte 27 + .byte 31 + .byte 75 + .byte 79 + .byte 203 + .byte 207 + .byte 133 + .byte 192 + .byte 64 + .byte 21 + .byte 69 + .byte 16 + .byte 128 + .byte 197 + .byte 130 + .byte 135 + .byte 18 + .byte 23 + .byte 66 + .byte 71 + .byte 194 + .byte 199 + .byte 150 + .byte 147 + .byte 3 + .byte 6 + .byte 86 + .byte 83 + .byte 211 + .byte 214 + .byte 209 + .byte 148 + .byte 81 + .byte 4 + .byte 1 + .byte 84 + .byte 145 + .byte 212 + .byte 156 + .byte 216 + .byte 88 + .byte 12 + .byte 92 + .byte 8 + .byte 152 + .byte 220 + .byte 154 + .byte 158 + .byte 10 + .byte 14 + .byte 90 + .byte 94 + .byte 218 + .byte 222 + .byte 149 + .byte 208 + .byte 80 + .byte 5 + .byte 85 + .byte 0 + .byte 144 + .byte 213 + .byte 146 + .byte 151 + .byte 2 + .byte 7 + .byte 82 + .byte 87 + .byte 210 + .byte 215 + .byte 157 + .byte 217 + .byte 89 + .byte 13 + .byte 93 + .byte 9 + .byte 153 + .byte 221 + .byte 155 + .byte 159 + .byte 11 + .byte 15 + .byte 91 + .byte 95 + .byte 219 + .byte 223 + .byte 22 + .byte 19 + .byte 131 + .byte 134 + .byte 70 + .byte 67 + .byte 195 + .byte 198 + .byte 65 + .byte 20 + .byte 193 + .byte 132 + .byte 17 + .byte 68 + .byte 129 + .byte 196 + .byte 28 + .byte 72 + .byte 200 + .byte 140 + .byte 76 + .byte 24 + .byte 136 + .byte 204 + .byte 26 + .byte 30 + .byte 138 + .byte 142 + .byte 74 + .byte 78 + .byte 202 + .byte 206 + .byte 53 + .byte 96 + .byte 224 + .byte 165 + .byte 101 + .byte 48 + .byte 160 + .byte 229 + .byte 50 + .byte 55 + .byte 162 + .byte 167 + .byte 98 + .byte 103 + .byte 226 + .byte 231 + .byte 61 + .byte 105 + .byte 233 + .byte 173 + .byte 109 + .byte 57 + .byte 169 + .byte 237 + .byte 59 + .byte 63 + .byte 171 + .byte 175 + .byte 107 + .byte 111 + .byte 235 + .byte 239 + .byte 38 + .byte 35 + .byte 179 + .byte 182 + .byte 118 + .byte 115 + .byte 243 + .byte 246 + .byte 113 + .byte 36 + .byte 241 + .byte 180 + .byte 33 + .byte 116 + .byte 177 + .byte 244 + .byte 44 + .byte 120 + .byte 248 + .byte 188 + .byte 124 + .byte 40 + .byte 184 + .byte 252 + .byte 42 + .byte 46 + .byte 186 + .byte 190 + .byte 122 + .byte 126 + .byte 250 + .byte 254 + .byte 37 + .byte 112 + .byte 240 + .byte 181 + .byte 117 + .byte 32 + .byte 176 + .byte 245 + .byte 34 + .byte 39 + .byte 178 + .byte 183 + .byte 114 + .byte 119 + .byte 242 + .byte 247 + .byte 45 + .byte 121 + .byte 249 + .byte 189 + .byte 125 + .byte 41 + .byte 185 + .byte 253 + .byte 43 + .byte 47 + .byte 187 + .byte 191 + .byte 123 + .byte 127 + .byte 251 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_2, @object + .size table_2, 256 +table_2: + .byte 0 + .byte 2 + .byte 4 + .byte 6 + .byte 8 + .byte 10 + .byte 12 + .byte 14 + .byte 16 + .byte 18 + .byte 20 + .byte 22 + .byte 24 + .byte 26 + .byte 28 + .byte 30 + .byte 32 + .byte 34 + .byte 36 + .byte 38 + .byte 40 + .byte 42 + .byte 44 + .byte 46 + .byte 48 + .byte 50 + .byte 52 + .byte 54 + .byte 56 + .byte 58 + .byte 60 + .byte 62 + .byte 65 + .byte 67 + .byte 69 + .byte 71 + .byte 73 + .byte 75 + .byte 77 + .byte 79 + .byte 81 + .byte 83 + .byte 85 + .byte 87 + .byte 89 + .byte 91 + .byte 93 + .byte 95 + .byte 97 + .byte 99 + .byte 101 + .byte 103 + .byte 105 + .byte 107 + .byte 109 + .byte 111 + .byte 113 + .byte 115 + .byte 117 + .byte 119 + .byte 121 + .byte 123 + .byte 125 + .byte 127 + .byte 128 + .byte 130 + .byte 132 + .byte 134 + .byte 136 + .byte 138 + .byte 140 + .byte 142 + .byte 144 + .byte 146 + .byte 148 + .byte 150 + .byte 152 + .byte 154 + .byte 156 + .byte 158 + .byte 160 + .byte 162 + .byte 164 + .byte 166 + .byte 168 + .byte 170 + .byte 172 + .byte 174 + .byte 176 + .byte 178 + .byte 180 + .byte 182 + .byte 184 + .byte 186 + .byte 188 + .byte 190 + .byte 193 + .byte 195 + .byte 197 + .byte 199 + .byte 201 + .byte 203 + .byte 205 + .byte 207 + .byte 209 + .byte 211 + .byte 213 + .byte 215 + .byte 217 + .byte 219 + .byte 221 + .byte 223 + .byte 225 + .byte 227 + .byte 229 + .byte 231 + .byte 233 + .byte 235 + .byte 237 + .byte 239 + .byte 241 + .byte 243 + .byte 245 + .byte 247 + .byte 249 + .byte 251 + .byte 253 + .byte 255 + .byte 1 + .byte 3 + .byte 5 + .byte 7 + .byte 9 + .byte 11 + .byte 13 + .byte 15 + .byte 17 + .byte 19 + .byte 21 + .byte 23 + .byte 25 + .byte 27 + .byte 29 + .byte 31 + .byte 33 + .byte 35 + .byte 37 + .byte 39 + .byte 41 + .byte 43 + .byte 45 + .byte 47 + .byte 49 + .byte 51 + .byte 53 + .byte 55 + .byte 57 + .byte 59 + .byte 61 + .byte 63 + .byte 64 + .byte 66 + .byte 68 + .byte 70 + .byte 72 + .byte 74 + .byte 76 + .byte 78 + .byte 80 + .byte 82 + .byte 84 + .byte 86 + .byte 88 + .byte 90 + .byte 92 + .byte 94 + .byte 96 + .byte 98 + .byte 100 + .byte 102 + .byte 104 + .byte 106 + .byte 108 + .byte 110 + .byte 112 + .byte 114 + .byte 116 + .byte 118 + .byte 120 + .byte 122 + .byte 124 + .byte 126 + .byte 129 + .byte 131 + .byte 133 + .byte 135 + .byte 137 + .byte 139 + .byte 141 + .byte 143 + .byte 145 + .byte 147 + .byte 149 + .byte 151 + .byte 153 + .byte 155 + .byte 157 + .byte 159 + .byte 161 + .byte 163 + .byte 165 + .byte 167 + .byte 169 + .byte 171 + .byte 173 + .byte 175 + .byte 177 + .byte 179 + .byte 181 + .byte 183 + .byte 185 + .byte 187 + .byte 189 + .byte 191 + .byte 192 + .byte 194 + .byte 196 + .byte 198 + .byte 200 + .byte 202 + .byte 204 + .byte 206 + .byte 208 + .byte 210 + .byte 212 + .byte 214 + .byte 216 + .byte 218 + .byte 220 + .byte 222 + .byte 224 + .byte 226 + .byte 228 + .byte 230 + .byte 232 + .byte 234 + .byte 236 + .byte 238 + .byte 240 + .byte 242 + .byte 244 + .byte 246 + .byte 248 + .byte 250 + .byte 252 + .byte 254 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_3, @object + .size table_3, 256 +table_3: + .byte 0 + .byte 128 + .byte 1 + .byte 129 + .byte 2 + .byte 130 + .byte 3 + .byte 131 + .byte 4 + .byte 132 + .byte 5 + .byte 133 + .byte 6 + .byte 134 + .byte 7 + .byte 135 + .byte 8 + .byte 136 + .byte 9 + .byte 137 + .byte 10 + .byte 138 + .byte 11 + .byte 139 + .byte 12 + .byte 140 + .byte 13 + .byte 141 + .byte 14 + .byte 142 + .byte 15 + .byte 143 + .byte 16 + .byte 144 + .byte 17 + .byte 145 + .byte 18 + .byte 146 + .byte 19 + .byte 147 + .byte 20 + .byte 148 + .byte 21 + .byte 149 + .byte 22 + .byte 150 + .byte 23 + .byte 151 + .byte 24 + .byte 152 + .byte 25 + .byte 153 + .byte 26 + .byte 154 + .byte 27 + .byte 155 + .byte 28 + .byte 156 + .byte 29 + .byte 157 + .byte 30 + .byte 158 + .byte 31 + .byte 159 + .byte 160 + .byte 32 + .byte 161 + .byte 33 + .byte 162 + .byte 34 + .byte 163 + .byte 35 + .byte 164 + .byte 36 + .byte 165 + .byte 37 + .byte 166 + .byte 38 + .byte 167 + .byte 39 + .byte 168 + .byte 40 + .byte 169 + .byte 41 + .byte 170 + .byte 42 + .byte 171 + .byte 43 + .byte 172 + .byte 44 + .byte 173 + .byte 45 + .byte 174 + .byte 46 + .byte 175 + .byte 47 + .byte 176 + .byte 48 + .byte 177 + .byte 49 + .byte 178 + .byte 50 + .byte 179 + .byte 51 + .byte 180 + .byte 52 + .byte 181 + .byte 53 + .byte 182 + .byte 54 + .byte 183 + .byte 55 + .byte 184 + .byte 56 + .byte 185 + .byte 57 + .byte 186 + .byte 58 + .byte 187 + .byte 59 + .byte 188 + .byte 60 + .byte 189 + .byte 61 + .byte 190 + .byte 62 + .byte 191 + .byte 63 + .byte 64 + .byte 192 + .byte 65 + .byte 193 + .byte 66 + .byte 194 + .byte 67 + .byte 195 + .byte 68 + .byte 196 + .byte 69 + .byte 197 + .byte 70 + .byte 198 + .byte 71 + .byte 199 + .byte 72 + .byte 200 + .byte 73 + .byte 201 + .byte 74 + .byte 202 + .byte 75 + .byte 203 + .byte 76 + .byte 204 + .byte 77 + .byte 205 + .byte 78 + .byte 206 + .byte 79 + .byte 207 + .byte 80 + .byte 208 + .byte 81 + .byte 209 + .byte 82 + .byte 210 + .byte 83 + .byte 211 + .byte 84 + .byte 212 + .byte 85 + .byte 213 + .byte 86 + .byte 214 + .byte 87 + .byte 215 + .byte 88 + .byte 216 + .byte 89 + .byte 217 + .byte 90 + .byte 218 + .byte 91 + .byte 219 + .byte 92 + .byte 220 + .byte 93 + .byte 221 + .byte 94 + .byte 222 + .byte 95 + .byte 223 + .byte 224 + .byte 96 + .byte 225 + .byte 97 + .byte 226 + .byte 98 + .byte 227 + .byte 99 + .byte 228 + .byte 100 + .byte 229 + .byte 101 + .byte 230 + .byte 102 + .byte 231 + .byte 103 + .byte 232 + .byte 104 + .byte 233 + .byte 105 + .byte 234 + .byte 106 + .byte 235 + .byte 107 + .byte 236 + .byte 108 + .byte 237 + .byte 109 + .byte 238 + .byte 110 + .byte 239 + .byte 111 + .byte 240 + .byte 112 + .byte 241 + .byte 113 + .byte 242 + .byte 114 + .byte 243 + .byte 115 + .byte 244 + .byte 116 + .byte 245 + .byte 117 + .byte 246 + .byte 118 + .byte 247 + .byte 119 + .byte 248 + .byte 120 + .byte 249 + .byte 121 + .byte 250 + .byte 122 + .byte 251 + .byte 123 + .byte 252 + .byte 124 + .byte 253 + .byte 125 + .byte 254 + .byte 126 + .byte 255 + .byte 127 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_4, @object + .size table_4, 174 +table_4: + .byte 1 + .byte 0 + .byte 3 + .byte 0 + .byte 7 + .byte 0 + .byte 15 + .byte 0 + .byte 15 + .byte 1 + .byte 15 + .byte 3 + .byte 14 + .byte 7 + .byte 13 + .byte 7 + .byte 11 + .byte 7 + .byte 7 + .byte 7 + .byte 15 + .byte 6 + .byte 15 + .byte 5 + .byte 14 + .byte 3 + .byte 12 + .byte 7 + .byte 9 + .byte 7 + .byte 3 + .byte 7 + .byte 7 + .byte 6 + .byte 15 + .byte 4 + .byte 14 + .byte 1 + .byte 13 + .byte 3 + .byte 10 + .byte 7 + .byte 5 + .byte 7 + .byte 11 + .byte 6 + .byte 7 + .byte 5 + .byte 14 + .byte 2 + .byte 12 + .byte 5 + .byte 8 + .byte 3 + .byte 0 + .byte 7 + .byte 1 + .byte 6 + .byte 3 + .byte 4 + .byte 6 + .byte 0 + .byte 13 + .byte 0 + .byte 11 + .byte 1 + .byte 7 + .byte 3 + .byte 14 + .byte 6 + .byte 13 + .byte 5 + .byte 10 + .byte 3 + .byte 4 + .byte 7 + .byte 9 + .byte 6 + .byte 3 + .byte 5 + .byte 6 + .byte 2 + .byte 12 + .byte 4 + .byte 8 + .byte 1 + .byte 1 + .byte 3 + .byte 2 + .byte 6 + .byte 5 + .byte 4 + .byte 10 + .byte 0 + .byte 5 + .byte 1 + .byte 11 + .byte 2 + .byte 6 + .byte 5 + .byte 12 + .byte 2 + .byte 8 + .byte 5 + .byte 0 + .byte 3 + .byte 0 + .byte 6 + .byte 1 + .byte 4 + .byte 2 + .byte 0 + .byte 5 + .byte 0 + .byte 11 + .byte 0 + .byte 7 + .byte 1 + .byte 15 + .byte 2 + .byte 14 + .byte 5 + .byte 12 + .byte 3 + .byte 8 + .byte 7 + .byte 1 + .byte 7 + .byte 3 + .byte 6 + .byte 7 + .byte 4 + .byte 14 + .byte 0 + .byte 13 + .byte 1 + .byte 11 + .byte 3 + .byte 6 + .byte 7 + .byte 13 + .byte 6 + .byte 11 + .byte 5 + .byte 6 + .byte 3 + .byte 12 + .byte 6 + .byte 9 + .byte 5 + .byte 2 + .byte 3 + .byte 4 + .byte 6 + .byte 9 + .byte 4 + .byte 2 + .byte 1 + .byte 5 + .byte 2 + .byte 10 + .byte 4 + .byte 4 + .byte 1 + .byte 9 + .byte 2 + .byte 2 + .byte 5 + .byte 4 + .byte 2 + .byte 8 + .byte 4 + .byte 0 + .byte 1 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_5, @object + .size table_5, 256 +table_5: + .byte 204 + .byte 198 + .byte 201 + .byte 192 + .byte 193 + .byte 202 + .byte 194 + .byte 203 + .byte 195 + .byte 200 + .byte 197 + .byte 205 + .byte 196 + .byte 206 + .byte 199 + .byte 207 + .byte 108 + .byte 102 + .byte 105 + .byte 96 + .byte 97 + .byte 106 + .byte 98 + .byte 107 + .byte 99 + .byte 104 + .byte 101 + .byte 109 + .byte 100 + .byte 110 + .byte 103 + .byte 111 + .byte 156 + .byte 150 + .byte 153 + .byte 144 + .byte 145 + .byte 154 + .byte 146 + .byte 155 + .byte 147 + .byte 152 + .byte 149 + .byte 157 + .byte 148 + .byte 158 + .byte 151 + .byte 159 + .byte 12 + .byte 6 + .byte 9 + .byte 0 + .byte 1 + .byte 10 + .byte 2 + .byte 11 + .byte 3 + .byte 8 + .byte 5 + .byte 13 + .byte 4 + .byte 14 + .byte 7 + .byte 15 + .byte 28 + .byte 22 + .byte 25 + .byte 16 + .byte 17 + .byte 26 + .byte 18 + .byte 27 + .byte 19 + .byte 24 + .byte 21 + .byte 29 + .byte 20 + .byte 30 + .byte 23 + .byte 31 + .byte 172 + .byte 166 + .byte 169 + .byte 160 + .byte 161 + .byte 170 + .byte 162 + .byte 171 + .byte 163 + .byte 168 + .byte 165 + .byte 173 + .byte 164 + .byte 174 + .byte 167 + .byte 175 + .byte 44 + .byte 38 + .byte 41 + .byte 32 + .byte 33 + .byte 42 + .byte 34 + .byte 43 + .byte 35 + .byte 40 + .byte 37 + .byte 45 + .byte 36 + .byte 46 + .byte 39 + .byte 47 + .byte 188 + .byte 182 + .byte 185 + .byte 176 + .byte 177 + .byte 186 + .byte 178 + .byte 187 + .byte 179 + .byte 184 + .byte 181 + .byte 189 + .byte 180 + .byte 190 + .byte 183 + .byte 191 + .byte 60 + .byte 54 + .byte 57 + .byte 48 + .byte 49 + .byte 58 + .byte 50 + .byte 59 + .byte 51 + .byte 56 + .byte 53 + .byte 61 + .byte 52 + .byte 62 + .byte 55 + .byte 63 + .byte 140 + .byte 134 + .byte 137 + .byte 128 + .byte 129 + .byte 138 + .byte 130 + .byte 139 + .byte 131 + .byte 136 + .byte 133 + .byte 141 + .byte 132 + .byte 142 + .byte 135 + .byte 143 + .byte 92 + .byte 86 + .byte 89 + .byte 80 + .byte 81 + .byte 90 + .byte 82 + .byte 91 + .byte 83 + .byte 88 + .byte 85 + .byte 93 + .byte 84 + .byte 94 + .byte 87 + .byte 95 + .byte 220 + .byte 214 + .byte 217 + .byte 208 + .byte 209 + .byte 218 + .byte 210 + .byte 219 + .byte 211 + .byte 216 + .byte 213 + .byte 221 + .byte 212 + .byte 222 + .byte 215 + .byte 223 + .byte 76 + .byte 70 + .byte 73 + .byte 64 + .byte 65 + .byte 74 + .byte 66 + .byte 75 + .byte 67 + .byte 72 + .byte 69 + .byte 77 + .byte 68 + .byte 78 + .byte 71 + .byte 79 + .byte 236 + .byte 230 + .byte 233 + .byte 224 + .byte 225 + .byte 234 + .byte 226 + .byte 235 + .byte 227 + .byte 232 + .byte 229 + .byte 237 + .byte 228 + .byte 238 + .byte 231 + .byte 239 + .byte 124 + .byte 118 + .byte 121 + .byte 112 + .byte 113 + .byte 122 + .byte 114 + .byte 123 + .byte 115 + .byte 120 + .byte 117 + .byte 125 + .byte 116 + .byte 126 + .byte 119 + .byte 127 + .byte 252 + .byte 246 + .byte 249 + .byte 240 + .byte 241 + .byte 250 + .byte 242 + .byte 251 + .byte 243 + .byte 248 + .byte 245 + .byte 253 + .byte 244 + .byte 254 + .byte 247 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_6, @object + .size table_6, 256 +table_6: + .byte 51 + .byte 52 + .byte 54 + .byte 56 + .byte 60 + .byte 58 + .byte 49 + .byte 62 + .byte 57 + .byte 50 + .byte 53 + .byte 55 + .byte 48 + .byte 59 + .byte 61 + .byte 63 + .byte 67 + .byte 68 + .byte 70 + .byte 72 + .byte 76 + .byte 74 + .byte 65 + .byte 78 + .byte 73 + .byte 66 + .byte 69 + .byte 71 + .byte 64 + .byte 75 + .byte 77 + .byte 79 + .byte 99 + .byte 100 + .byte 102 + .byte 104 + .byte 108 + .byte 106 + .byte 97 + .byte 110 + .byte 105 + .byte 98 + .byte 101 + .byte 103 + .byte 96 + .byte 107 + .byte 109 + .byte 111 + .byte 131 + .byte 132 + .byte 134 + .byte 136 + .byte 140 + .byte 138 + .byte 129 + .byte 142 + .byte 137 + .byte 130 + .byte 133 + .byte 135 + .byte 128 + .byte 139 + .byte 141 + .byte 143 + .byte 195 + .byte 196 + .byte 198 + .byte 200 + .byte 204 + .byte 202 + .byte 193 + .byte 206 + .byte 201 + .byte 194 + .byte 197 + .byte 199 + .byte 192 + .byte 203 + .byte 205 + .byte 207 + .byte 163 + .byte 164 + .byte 166 + .byte 168 + .byte 172 + .byte 170 + .byte 161 + .byte 174 + .byte 169 + .byte 162 + .byte 165 + .byte 167 + .byte 160 + .byte 171 + .byte 173 + .byte 175 + .byte 19 + .byte 20 + .byte 22 + .byte 24 + .byte 28 + .byte 26 + .byte 17 + .byte 30 + .byte 25 + .byte 18 + .byte 21 + .byte 23 + .byte 16 + .byte 27 + .byte 29 + .byte 31 + .byte 227 + .byte 228 + .byte 230 + .byte 232 + .byte 236 + .byte 234 + .byte 225 + .byte 238 + .byte 233 + .byte 226 + .byte 229 + .byte 231 + .byte 224 + .byte 235 + .byte 237 + .byte 239 + .byte 147 + .byte 148 + .byte 150 + .byte 152 + .byte 156 + .byte 154 + .byte 145 + .byte 158 + .byte 153 + .byte 146 + .byte 149 + .byte 151 + .byte 144 + .byte 155 + .byte 157 + .byte 159 + .byte 35 + .byte 36 + .byte 38 + .byte 40 + .byte 44 + .byte 42 + .byte 33 + .byte 46 + .byte 41 + .byte 34 + .byte 37 + .byte 39 + .byte 32 + .byte 43 + .byte 45 + .byte 47 + .byte 83 + .byte 84 + .byte 86 + .byte 88 + .byte 92 + .byte 90 + .byte 81 + .byte 94 + .byte 89 + .byte 82 + .byte 85 + .byte 87 + .byte 80 + .byte 91 + .byte 93 + .byte 95 + .byte 115 + .byte 116 + .byte 118 + .byte 120 + .byte 124 + .byte 122 + .byte 113 + .byte 126 + .byte 121 + .byte 114 + .byte 117 + .byte 119 + .byte 112 + .byte 123 + .byte 125 + .byte 127 + .byte 3 + .byte 4 + .byte 6 + .byte 8 + .byte 12 + .byte 10 + .byte 1 + .byte 14 + .byte 9 + .byte 2 + .byte 5 + .byte 7 + .byte 0 + .byte 11 + .byte 13 + .byte 15 + .byte 179 + .byte 180 + .byte 182 + .byte 184 + .byte 188 + .byte 186 + .byte 177 + .byte 190 + .byte 185 + .byte 178 + .byte 181 + .byte 183 + .byte 176 + .byte 187 + .byte 189 + .byte 191 + .byte 211 + .byte 212 + .byte 214 + .byte 216 + .byte 220 + .byte 218 + .byte 209 + .byte 222 + .byte 217 + .byte 210 + .byte 213 + .byte 215 + .byte 208 + .byte 219 + .byte 221 + .byte 223 + .byte 243 + .byte 244 + .byte 246 + .byte 248 + .byte 252 + .byte 250 + .byte 241 + .byte 254 + .byte 249 + .byte 242 + .byte 245 + .byte 247 + .byte 240 + .byte 251 + .byte 253 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_7, @object + .size table_7, 256 +table_7: + .byte 0 + .byte 2 + .byte 4 + .byte 6 + .byte 9 + .byte 11 + .byte 13 + .byte 15 + .byte 1 + .byte 3 + .byte 5 + .byte 7 + .byte 8 + .byte 10 + .byte 12 + .byte 14 + .byte 32 + .byte 34 + .byte 36 + .byte 38 + .byte 41 + .byte 43 + .byte 45 + .byte 47 + .byte 33 + .byte 35 + .byte 37 + .byte 39 + .byte 40 + .byte 42 + .byte 44 + .byte 46 + .byte 64 + .byte 66 + .byte 68 + .byte 70 + .byte 73 + .byte 75 + .byte 77 + .byte 79 + .byte 65 + .byte 67 + .byte 69 + .byte 71 + .byte 72 + .byte 74 + .byte 76 + .byte 78 + .byte 96 + .byte 98 + .byte 100 + .byte 102 + .byte 105 + .byte 107 + .byte 109 + .byte 111 + .byte 97 + .byte 99 + .byte 101 + .byte 103 + .byte 104 + .byte 106 + .byte 108 + .byte 110 + .byte 144 + .byte 146 + .byte 148 + .byte 150 + .byte 153 + .byte 155 + .byte 157 + .byte 159 + .byte 145 + .byte 147 + .byte 149 + .byte 151 + .byte 152 + .byte 154 + .byte 156 + .byte 158 + .byte 176 + .byte 178 + .byte 180 + .byte 182 + .byte 185 + .byte 187 + .byte 189 + .byte 191 + .byte 177 + .byte 179 + .byte 181 + .byte 183 + .byte 184 + .byte 186 + .byte 188 + .byte 190 + .byte 208 + .byte 210 + .byte 212 + .byte 214 + .byte 217 + .byte 219 + .byte 221 + .byte 223 + .byte 209 + .byte 211 + .byte 213 + .byte 215 + .byte 216 + .byte 218 + .byte 220 + .byte 222 + .byte 240 + .byte 242 + .byte 244 + .byte 246 + .byte 249 + .byte 251 + .byte 253 + .byte 255 + .byte 241 + .byte 243 + .byte 245 + .byte 247 + .byte 248 + .byte 250 + .byte 252 + .byte 254 + .byte 16 + .byte 18 + .byte 20 + .byte 22 + .byte 25 + .byte 27 + .byte 29 + .byte 31 + .byte 17 + .byte 19 + .byte 21 + .byte 23 + .byte 24 + .byte 26 + .byte 28 + .byte 30 + .byte 48 + .byte 50 + .byte 52 + .byte 54 + .byte 57 + .byte 59 + .byte 61 + .byte 63 + .byte 49 + .byte 51 + .byte 53 + .byte 55 + .byte 56 + .byte 58 + .byte 60 + .byte 62 + .byte 80 + .byte 82 + .byte 84 + .byte 86 + .byte 89 + .byte 91 + .byte 93 + .byte 95 + .byte 81 + .byte 83 + .byte 85 + .byte 87 + .byte 88 + .byte 90 + .byte 92 + .byte 94 + .byte 112 + .byte 114 + .byte 116 + .byte 118 + .byte 121 + .byte 123 + .byte 125 + .byte 127 + .byte 113 + .byte 115 + .byte 117 + .byte 119 + .byte 120 + .byte 122 + .byte 124 + .byte 126 + .byte 128 + .byte 130 + .byte 132 + .byte 134 + .byte 137 + .byte 139 + .byte 141 + .byte 143 + .byte 129 + .byte 131 + .byte 133 + .byte 135 + .byte 136 + .byte 138 + .byte 140 + .byte 142 + .byte 160 + .byte 162 + .byte 164 + .byte 166 + .byte 169 + .byte 171 + .byte 173 + .byte 175 + .byte 161 + .byte 163 + .byte 165 + .byte 167 + .byte 168 + .byte 170 + .byte 172 + .byte 174 + .byte 192 + .byte 194 + .byte 196 + .byte 198 + .byte 201 + .byte 203 + .byte 205 + .byte 207 + .byte 193 + .byte 195 + .byte 197 + .byte 199 + .byte 200 + .byte 202 + .byte 204 + .byte 206 + .byte 224 + .byte 226 + .byte 228 + .byte 230 + .byte 233 + .byte 235 + .byte 237 + .byte 239 + .byte 225 + .byte 227 + .byte 229 + .byte 231 + .byte 232 + .byte 234 + .byte 236 + .byte 238 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_8, @object + .size table_8, 256 +table_8: + .byte 0 + .byte 8 + .byte 1 + .byte 9 + .byte 2 + .byte 10 + .byte 3 + .byte 11 + .byte 12 + .byte 4 + .byte 13 + .byte 5 + .byte 14 + .byte 6 + .byte 15 + .byte 7 + .byte 128 + .byte 136 + .byte 129 + .byte 137 + .byte 130 + .byte 138 + .byte 131 + .byte 139 + .byte 140 + .byte 132 + .byte 141 + .byte 133 + .byte 142 + .byte 134 + .byte 143 + .byte 135 + .byte 16 + .byte 24 + .byte 17 + .byte 25 + .byte 18 + .byte 26 + .byte 19 + .byte 27 + .byte 28 + .byte 20 + .byte 29 + .byte 21 + .byte 30 + .byte 22 + .byte 31 + .byte 23 + .byte 144 + .byte 152 + .byte 145 + .byte 153 + .byte 146 + .byte 154 + .byte 147 + .byte 155 + .byte 156 + .byte 148 + .byte 157 + .byte 149 + .byte 158 + .byte 150 + .byte 159 + .byte 151 + .byte 32 + .byte 40 + .byte 33 + .byte 41 + .byte 34 + .byte 42 + .byte 35 + .byte 43 + .byte 44 + .byte 36 + .byte 45 + .byte 37 + .byte 46 + .byte 38 + .byte 47 + .byte 39 + .byte 160 + .byte 168 + .byte 161 + .byte 169 + .byte 162 + .byte 170 + .byte 163 + .byte 171 + .byte 172 + .byte 164 + .byte 173 + .byte 165 + .byte 174 + .byte 166 + .byte 175 + .byte 167 + .byte 48 + .byte 56 + .byte 49 + .byte 57 + .byte 50 + .byte 58 + .byte 51 + .byte 59 + .byte 60 + .byte 52 + .byte 61 + .byte 53 + .byte 62 + .byte 54 + .byte 63 + .byte 55 + .byte 176 + .byte 184 + .byte 177 + .byte 185 + .byte 178 + .byte 186 + .byte 179 + .byte 187 + .byte 188 + .byte 180 + .byte 189 + .byte 181 + .byte 190 + .byte 182 + .byte 191 + .byte 183 + .byte 192 + .byte 200 + .byte 193 + .byte 201 + .byte 194 + .byte 202 + .byte 195 + .byte 203 + .byte 204 + .byte 196 + .byte 205 + .byte 197 + .byte 206 + .byte 198 + .byte 207 + .byte 199 + .byte 64 + .byte 72 + .byte 65 + .byte 73 + .byte 66 + .byte 74 + .byte 67 + .byte 75 + .byte 76 + .byte 68 + .byte 77 + .byte 69 + .byte 78 + .byte 70 + .byte 79 + .byte 71 + .byte 208 + .byte 216 + .byte 209 + .byte 217 + .byte 210 + .byte 218 + .byte 211 + .byte 219 + .byte 220 + .byte 212 + .byte 221 + .byte 213 + .byte 222 + .byte 214 + .byte 223 + .byte 215 + .byte 80 + .byte 88 + .byte 81 + .byte 89 + .byte 82 + .byte 90 + .byte 83 + .byte 91 + .byte 92 + .byte 84 + .byte 93 + .byte 85 + .byte 94 + .byte 86 + .byte 95 + .byte 87 + .byte 224 + .byte 232 + .byte 225 + .byte 233 + .byte 226 + .byte 234 + .byte 227 + .byte 235 + .byte 236 + .byte 228 + .byte 237 + .byte 229 + .byte 238 + .byte 230 + .byte 239 + .byte 231 + .byte 96 + .byte 104 + .byte 97 + .byte 105 + .byte 98 + .byte 106 + .byte 99 + .byte 107 + .byte 108 + .byte 100 + .byte 109 + .byte 101 + .byte 110 + .byte 102 + .byte 111 + .byte 103 + .byte 240 + .byte 248 + .byte 241 + .byte 249 + .byte 242 + .byte 250 + .byte 243 + .byte 251 + .byte 252 + .byte 244 + .byte 253 + .byte 245 + .byte 254 + .byte 246 + .byte 255 + .byte 247 + .byte 112 + .byte 120 + .byte 113 + .byte 121 + .byte 114 + .byte 122 + .byte 115 + .byte 123 + .byte 124 + .byte 116 + .byte 125 + .byte 117 + .byte 126 + .byte 118 + .byte 127 + .byte 119 + + .text +.global forkskinny_128_256_rounds + .type forkskinny_128_256_rounds, @function +forkskinny_128_256_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 51 + ldd r2,Z+32 + ldd r3,Z+33 + ldd r4,Z+34 + ldd r5,Z+35 + ldd r6,Z+36 + ldd r7,Z+37 + ldd r8,Z+38 + ldd r9,Z+39 + ldd r10,Z+40 + ldd r11,Z+41 + ldd r12,Z+42 + ldd r13,Z+43 + ldd r14,Z+44 + ldd r15,Z+45 + ldd r24,Z+46 + ldd r25,Z+47 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + lsl r20 + std Y+33,r20 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +86: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + mov r0,r14 + mov r14,r15 + mov r15,r24 + mov r24,r25 + mov r25,r0 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + movw r18,r14 + movw r26,r24 + eor r18,r10 + eor r19,r11 + eor r26,r12 + eor r27,r13 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r18 + movw r4,r26 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + ldd r0,Y+1 + std Y+9,r0 + ldd r0,Y+2 + std Y+10,r0 + ldd r0,Y+3 + std Y+11,r0 + ldd r0,Y+4 + std Y+12,r0 + ldd r0,Y+5 + std Y+13,r0 + ldd r0,Y+6 + std Y+14,r0 + ldd r0,Y+7 + std Y+15,r0 + ldd r0,Y+8 + std Y+16,r0 + std Y+1,r19 + std Y+2,r17 + std Y+3,r18 + std Y+4,r21 + std Y+5,r26 + std Y+6,r16 + std Y+7,r20 + std Y+8,r27 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r16,Y+31 + ldd r17,Y+32 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+17 + std Y+25,r0 + ldd r0,Y+18 + std Y+26,r0 + ldd r0,Y+19 + std Y+27,r0 + ldd r0,Y+20 + std Y+28,r0 + ldd r0,Y+21 + std Y+29,r0 + ldd r0,Y+22 + std Y+30,r0 + ldd r0,Y+23 + std Y+31,r0 + ldd r0,Y+24 + std Y+32,r0 + std Y+17,r19 + std Y+18,r17 + std Y+19,r18 + std Y+20,r21 + std Y+21,r26 + std Y+22,r16 + std Y+23,r20 + std Y+24,r27 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + ldd r18,Y+33 + cp r22,r18 + breq 5259f + rjmp 86b +5259: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+32,r2 + std Z+33,r3 + std Z+34,r4 + std Z+35,r5 + std Z+36,r6 + std Z+37,r7 + std Z+38,r8 + std Z+39,r9 + std Z+40,r10 + std Z+41,r11 + std Z+42,r12 + std Z+43,r13 + std Z+44,r14 + std Z+45,r15 + std Z+46,r24 + std Z+47,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + adiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_256_rounds, .-forkskinny_128_256_rounds + + .text +.global forkskinny_128_256_inv_rounds + .type forkskinny_128_256_inv_rounds, @function +forkskinny_128_256_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 51 + ldd r2,Z+32 + ldd r3,Z+33 + ldd r4,Z+34 + ldd r5,Z+35 + ldd r6,Z+36 + ldd r7,Z+37 + ldd r8,Z+38 + ldd r9,Z+39 + ldd r10,Z+40 + ldd r11,Z+41 + ldd r12,Z+42 + ldd r13,Z+43 + ldd r14,Z+44 + ldd r15,Z+45 + ldd r24,Z+46 + ldd r25,Z+47 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + lsl r20 + std Y+33,r20 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +86: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + ldd r0,Y+9 + std Y+1,r0 + ldd r0,Y+10 + std Y+2,r0 + ldd r0,Y+11 + std Y+3,r0 + ldd r0,Y+12 + std Y+4,r0 + ldd r0,Y+13 + std Y+5,r0 + ldd r0,Y+14 + std Y+6,r0 + ldd r0,Y+15 + std Y+7,r0 + ldd r0,Y+16 + std Y+8,r0 + std Y+9,r26 + std Y+10,r18 + std Y+11,r20 + std Y+12,r17 + std Y+13,r16 + std Y+14,r27 + std Y+15,r21 + std Y+16,r19 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r16,Y+23 + ldd r17,Y+24 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+25 + std Y+17,r0 + ldd r0,Y+26 + std Y+18,r0 + ldd r0,Y+27 + std Y+19,r0 + ldd r0,Y+28 + std Y+20,r0 + ldd r0,Y+29 + std Y+21,r0 + ldd r0,Y+30 + std Y+22,r0 + ldd r0,Y+31 + std Y+23,r0 + ldd r0,Y+32 + std Y+24,r0 + std Y+25,r26 + std Y+26,r18 + std Y+27,r20 + std Y+28,r17 + std Y+29,r16 + std Y+30,r27 + std Y+31,r21 + std Y+32,r19 + movw r18,r2 + movw r26,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r18 + movw r24,r26 + eor r14,r10 + eor r15,r11 + eor r24,r12 + eor r25,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + mov r0,r6 + mov r6,r7 + mov r7,r8 + mov r8,r9 + mov r9,r0 + mov r0,r10 + mov r10,r12 + mov r12,r0 + mov r0,r11 + mov r11,r13 + mov r13,r0 + mov r0,r25 + mov r25,r24 + mov r24,r15 + mov r15,r14 + mov r14,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + ldi r30,lo8(table_1) + ldi r31,hi8(table_1) +#if defined(RAMPZ) + ldi r18,hh8(table_1) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+33 + cp r22,r18 + breq 5259f + rjmp 86b +5259: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+32,r2 + std Z+33,r3 + std Z+34,r4 + std Z+35,r5 + std Z+36,r6 + std Z+37,r7 + std Z+38,r8 + std Z+39,r9 + std Z+40,r10 + std Z+41,r11 + std Z+42,r12 + std Z+43,r13 + std Z+44,r14 + std Z+45,r15 + std Z+46,r24 + std Z+47,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + adiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_256_inv_rounds, .-forkskinny_128_256_inv_rounds + + .text +.global forkskinny_128_256_forward_tk + .type forkskinny_128_256_forward_tk, @function +forkskinny_128_256_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 34 + ld r4,Z + ldd r5,Z+1 + ldd r6,Z+2 + ldd r7,Z+3 + ldd r8,Z+4 + ldd r9,Z+5 + ldd r10,Z+6 + ldd r11,Z+7 + ldd r12,Z+8 + ldd r13,Z+9 + ldd r14,Z+10 + ldd r15,Z+11 + ldd r24,Z+12 + ldd r25,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + push r31 + push r30 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r23,hh8(table_2) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +51: + movw r18,r12 + movw r20,r14 + movw r26,r24 + movw r2,r16 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + mov r4,r19 + mov r5,r3 + mov r6,r18 + mov r7,r27 + mov r8,r20 + mov r9,r2 + mov r10,r26 + mov r11,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r26,Y+13 + ldd r27,Y+14 + ldd r2,Y+15 + ldd r3,Y+16 + ldd r23,Y+1 + std Y+9,r23 + ldd r23,Y+2 + std Y+10,r23 + ldd r23,Y+3 + std Y+11,r23 + ldd r23,Y+4 + std Y+12,r23 + ldd r23,Y+5 + std Y+13,r23 + ldd r23,Y+6 + std Y+14,r23 + ldd r23,Y+7 + std Y+15,r23 + ldd r23,Y+8 + std Y+16,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+1,r19 + std Y+2,r3 + std Y+3,r18 + std Y+4,r27 + std Y+5,r20 + std Y+6,r2 + std Y+7,r26 + std Y+8,r21 + dec r22 + breq 5109f + rjmp 51b +5109: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r4 + std Z+1,r5 + std Z+2,r6 + std Z+3,r7 + std Z+4,r8 + std Z+5,r9 + std Z+6,r10 + std Z+7,r11 + std Z+8,r12 + std Z+9,r13 + std Z+10,r14 + std Z+11,r15 + std Z+12,r24 + std Z+13,r25 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_256_forward_tk, .-forkskinny_128_256_forward_tk + + .text +.global forkskinny_128_256_reverse_tk + .type forkskinny_128_256_reverse_tk, @function +forkskinny_128_256_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 34 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r23,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +51: + movw r18,r2 + movw r20,r4 + movw r26,r6 + movw r24,r8 + movw r2,r10 + movw r4,r12 + movw r6,r14 + movw r8,r16 + mov r10,r20 + mov r11,r18 + mov r12,r26 + mov r13,r25 + mov r14,r24 + mov r15,r21 + mov r16,r27 + mov r17,r19 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r24,Y+7 + ldd r25,Y+8 + ldd r23,Y+9 + std Y+1,r23 + ldd r23,Y+10 + std Y+2,r23 + ldd r23,Y+11 + std Y+3,r23 + ldd r23,Y+12 + std Y+4,r23 + ldd r23,Y+13 + std Y+5,r23 + ldd r23,Y+14 + std Y+6,r23 + ldd r23,Y+15 + std Y+7,r23 + ldd r23,Y+16 + std Y+8,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+9,r20 + std Y+10,r18 + std Y+11,r26 + std Y+12,r25 + std Y+13,r24 + std Y+14,r21 + std Y+15,r27 + std Y+16,r19 + dec r22 + breq 5109f + rjmp 51b +5109: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r2 + std Z+1,r3 + std Z+2,r4 + std Z+3,r5 + std Z+4,r6 + std Z+5,r7 + std Z+6,r8 + std Z+7,r9 + std Z+8,r10 + std Z+9,r11 + std Z+10,r12 + std Z+11,r13 + std Z+12,r14 + std Z+13,r15 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_256_reverse_tk, .-forkskinny_128_256_reverse_tk + + .text +.global forkskinny_128_384_rounds + .type forkskinny_128_384_rounds, @function +forkskinny_128_384_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 67 + ldd r2,Z+48 + ldd r3,Z+49 + ldd r4,Z+50 + ldd r5,Z+51 + ldd r6,Z+52 + ldd r7,Z+53 + ldd r8,Z+54 + ldd r9,Z+55 + ldd r10,Z+56 + ldd r11,Z+57 + ldd r12,Z+58 + ldd r13,Z+59 + ldd r14,Z+60 + ldd r15,Z+61 + ldd r24,Z+62 + ldd r25,Z+63 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r26,Z+34 + ldd r27,Z+35 + std Y+33,r18 + std Y+34,r19 + std Y+35,r26 + std Y+36,r27 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + std Y+37,r18 + std Y+38,r19 + std Y+39,r26 + std Y+40,r27 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r26,Z+42 + ldd r27,Z+43 + std Y+41,r18 + std Y+42,r19 + std Y+43,r26 + std Y+44,r27 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r26,Z+46 + ldd r27,Z+47 + std Y+45,r18 + std Y+46,r19 + std Y+47,r26 + std Y+48,r27 + lsl r20 + std Y+49,r20 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +118: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + mov r0,r14 + mov r14,r15 + mov r15,r24 + mov r24,r25 + mov r25,r0 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + movw r18,r14 + movw r26,r24 + eor r18,r10 + eor r19,r11 + eor r26,r12 + eor r27,r13 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r18 + movw r4,r26 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + ldd r0,Y+1 + std Y+9,r0 + ldd r0,Y+2 + std Y+10,r0 + ldd r0,Y+3 + std Y+11,r0 + ldd r0,Y+4 + std Y+12,r0 + ldd r0,Y+5 + std Y+13,r0 + ldd r0,Y+6 + std Y+14,r0 + ldd r0,Y+7 + std Y+15,r0 + ldd r0,Y+8 + std Y+16,r0 + std Y+1,r19 + std Y+2,r17 + std Y+3,r18 + std Y+4,r21 + std Y+5,r26 + std Y+6,r16 + std Y+7,r20 + std Y+8,r27 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r16,Y+31 + ldd r17,Y+32 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+17 + std Y+25,r0 + ldd r0,Y+18 + std Y+26,r0 + ldd r0,Y+19 + std Y+27,r0 + ldd r0,Y+20 + std Y+28,r0 + ldd r0,Y+21 + std Y+29,r0 + ldd r0,Y+22 + std Y+30,r0 + ldd r0,Y+23 + std Y+31,r0 + ldd r0,Y+24 + std Y+32,r0 + std Y+17,r19 + std Y+18,r17 + std Y+19,r18 + std Y+20,r21 + std Y+21,r26 + std Y+22,r16 + std Y+23,r20 + std Y+24,r27 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + ldd r20,Y+45 + ldd r21,Y+46 + ldd r16,Y+47 + ldd r17,Y+48 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+33 + std Y+41,r0 + ldd r0,Y+34 + std Y+42,r0 + ldd r0,Y+35 + std Y+43,r0 + ldd r0,Y+36 + std Y+44,r0 + ldd r0,Y+37 + std Y+45,r0 + ldd r0,Y+38 + std Y+46,r0 + ldd r0,Y+39 + std Y+47,r0 + ldd r0,Y+40 + std Y+48,r0 + std Y+33,r19 + std Y+34,r17 + std Y+35,r18 + std Y+36,r21 + std Y+37,r26 + std Y+38,r16 + std Y+39,r20 + std Y+40,r27 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + ldd r18,Y+49 + cp r22,r18 + breq 5348f + rjmp 118b +5348: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+48,r2 + std Z+49,r3 + std Z+50,r4 + std Z+51,r5 + std Z+52,r6 + std Z+53,r7 + std Z+54,r8 + std Z+55,r9 + std Z+56,r10 + std Z+57,r11 + std Z+58,r12 + std Z+59,r13 + std Z+60,r14 + std Z+61,r15 + std Z+62,r24 + std Z+63,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + std Z+32,r18 + std Z+33,r19 + std Z+34,r26 + std Z+35,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + std Z+36,r18 + std Z+37,r19 + std Z+38,r26 + std Z+39,r27 + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + std Z+40,r18 + std Z+41,r19 + std Z+42,r26 + std Z+43,r27 + ldd r18,Y+45 + ldd r19,Y+46 + ldd r26,Y+47 + ldd r27,Y+48 + std Z+44,r18 + std Z+45,r19 + std Z+46,r26 + std Z+47,r27 + adiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_384_rounds, .-forkskinny_128_384_rounds + + .text +.global forkskinny_128_384_inv_rounds + .type forkskinny_128_384_inv_rounds, @function +forkskinny_128_384_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 67 + ldd r2,Z+48 + ldd r3,Z+49 + ldd r4,Z+50 + ldd r5,Z+51 + ldd r6,Z+52 + ldd r7,Z+53 + ldd r8,Z+54 + ldd r9,Z+55 + ldd r10,Z+56 + ldd r11,Z+57 + ldd r12,Z+58 + ldd r13,Z+59 + ldd r14,Z+60 + ldd r15,Z+61 + ldd r24,Z+62 + ldd r25,Z+63 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r26,Z+34 + ldd r27,Z+35 + std Y+33,r18 + std Y+34,r19 + std Y+35,r26 + std Y+36,r27 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + std Y+37,r18 + std Y+38,r19 + std Y+39,r26 + std Y+40,r27 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r26,Z+42 + ldd r27,Z+43 + std Y+41,r18 + std Y+42,r19 + std Y+43,r26 + std Y+44,r27 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r26,Z+46 + ldd r27,Z+47 + std Y+45,r18 + std Y+46,r19 + std Y+47,r26 + std Y+48,r27 + lsl r20 + std Y+49,r20 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +118: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + ldd r0,Y+9 + std Y+1,r0 + ldd r0,Y+10 + std Y+2,r0 + ldd r0,Y+11 + std Y+3,r0 + ldd r0,Y+12 + std Y+4,r0 + ldd r0,Y+13 + std Y+5,r0 + ldd r0,Y+14 + std Y+6,r0 + ldd r0,Y+15 + std Y+7,r0 + ldd r0,Y+16 + std Y+8,r0 + std Y+9,r26 + std Y+10,r18 + std Y+11,r20 + std Y+12,r17 + std Y+13,r16 + std Y+14,r27 + std Y+15,r21 + std Y+16,r19 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r16,Y+23 + ldd r17,Y+24 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+25 + std Y+17,r0 + ldd r0,Y+26 + std Y+18,r0 + ldd r0,Y+27 + std Y+19,r0 + ldd r0,Y+28 + std Y+20,r0 + ldd r0,Y+29 + std Y+21,r0 + ldd r0,Y+30 + std Y+22,r0 + ldd r0,Y+31 + std Y+23,r0 + ldd r0,Y+32 + std Y+24,r0 + std Y+25,r26 + std Y+26,r18 + std Y+27,r20 + std Y+28,r17 + std Y+29,r16 + std Y+30,r27 + std Y+31,r21 + std Y+32,r19 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + ldd r20,Y+37 + ldd r21,Y+38 + ldd r16,Y+39 + ldd r17,Y+40 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+41 + std Y+33,r0 + ldd r0,Y+42 + std Y+34,r0 + ldd r0,Y+43 + std Y+35,r0 + ldd r0,Y+44 + std Y+36,r0 + ldd r0,Y+45 + std Y+37,r0 + ldd r0,Y+46 + std Y+38,r0 + ldd r0,Y+47 + std Y+39,r0 + ldd r0,Y+48 + std Y+40,r0 + std Y+41,r26 + std Y+42,r18 + std Y+43,r20 + std Y+44,r17 + std Y+45,r16 + std Y+46,r27 + std Y+47,r21 + std Y+48,r19 + movw r18,r2 + movw r26,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r18 + movw r24,r26 + eor r14,r10 + eor r15,r11 + eor r24,r12 + eor r25,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + mov r0,r6 + mov r6,r7 + mov r7,r8 + mov r8,r9 + mov r9,r0 + mov r0,r10 + mov r10,r12 + mov r12,r0 + mov r0,r11 + mov r11,r13 + mov r13,r0 + mov r0,r25 + mov r25,r24 + mov r24,r15 + mov r15,r14 + mov r14,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + ldi r30,lo8(table_1) + ldi r31,hi8(table_1) +#if defined(RAMPZ) + ldi r18,hh8(table_1) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+49 + cp r22,r18 + breq 5348f + rjmp 118b +5348: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+48,r2 + std Z+49,r3 + std Z+50,r4 + std Z+51,r5 + std Z+52,r6 + std Z+53,r7 + std Z+54,r8 + std Z+55,r9 + std Z+56,r10 + std Z+57,r11 + std Z+58,r12 + std Z+59,r13 + std Z+60,r14 + std Z+61,r15 + std Z+62,r24 + std Z+63,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + std Z+32,r18 + std Z+33,r19 + std Z+34,r26 + std Z+35,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + std Z+36,r18 + std Z+37,r19 + std Z+38,r26 + std Z+39,r27 + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + std Z+40,r18 + std Z+41,r19 + std Z+42,r26 + std Z+43,r27 + ldd r18,Y+45 + ldd r19,Y+46 + ldd r26,Y+47 + ldd r27,Y+48 + std Z+44,r18 + std Z+45,r19 + std Z+46,r26 + std Z+47,r27 + adiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_384_inv_rounds, .-forkskinny_128_384_inv_rounds + + .text +.global forkskinny_128_384_forward_tk + .type forkskinny_128_384_forward_tk, @function +forkskinny_128_384_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r4,Z + ldd r5,Z+1 + ldd r6,Z+2 + ldd r7,Z+3 + ldd r8,Z+4 + ldd r9,Z+5 + ldd r10,Z+6 + ldd r11,Z+7 + ldd r12,Z+8 + ldd r13,Z+9 + ldd r14,Z+10 + ldd r15,Z+11 + ldd r24,Z+12 + ldd r25,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + std Y+17,r18 + std Y+18,r19 + std Y+19,r20 + std Y+20,r21 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r20,Z+38 + ldd r21,Z+39 + std Y+21,r18 + std Y+22,r19 + std Y+23,r20 + std Y+24,r21 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r20,Z+42 + ldd r21,Z+43 + std Y+25,r18 + std Y+26,r19 + std Y+27,r20 + std Y+28,r21 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r20,Z+46 + ldd r21,Z+47 + std Y+29,r18 + std Y+30,r19 + std Y+31,r20 + std Y+32,r21 + push r31 + push r30 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r23,hh8(table_2) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +83: + movw r18,r12 + movw r20,r14 + movw r26,r24 + movw r2,r16 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + mov r4,r19 + mov r5,r3 + mov r6,r18 + mov r7,r27 + mov r8,r20 + mov r9,r2 + mov r10,r26 + mov r11,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r26,Y+13 + ldd r27,Y+14 + ldd r2,Y+15 + ldd r3,Y+16 + ldd r23,Y+1 + std Y+9,r23 + ldd r23,Y+2 + std Y+10,r23 + ldd r23,Y+3 + std Y+11,r23 + ldd r23,Y+4 + std Y+12,r23 + ldd r23,Y+5 + std Y+13,r23 + ldd r23,Y+6 + std Y+14,r23 + ldd r23,Y+7 + std Y+15,r23 + ldd r23,Y+8 + std Y+16,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+1,r19 + std Y+2,r3 + std Y+3,r18 + std Y+4,r27 + std Y+5,r20 + std Y+6,r2 + std Y+7,r26 + std Y+8,r21 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + ldd r26,Y+29 + ldd r27,Y+30 + ldd r2,Y+31 + ldd r3,Y+32 + ldd r23,Y+17 + std Y+25,r23 + ldd r23,Y+18 + std Y+26,r23 + ldd r23,Y+19 + std Y+27,r23 + ldd r23,Y+20 + std Y+28,r23 + ldd r23,Y+21 + std Y+29,r23 + ldd r23,Y+22 + std Y+30,r23 + ldd r23,Y+23 + std Y+31,r23 + ldd r23,Y+24 + std Y+32,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+17,r19 + std Y+18,r3 + std Y+19,r18 + std Y+20,r27 + std Y+21,r20 + std Y+22,r2 + std Y+23,r26 + std Y+24,r21 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + breq 5183f + rjmp 83b +5183: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r4 + std Z+1,r5 + std Z+2,r6 + std Z+3,r7 + std Z+4,r8 + std Z+5,r9 + std Z+6,r10 + std Z+7,r11 + std Z+8,r12 + std Z+9,r13 + std Z+10,r14 + std Z+11,r15 + std Z+12,r24 + std Z+13,r25 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r20,Y+23 + ldd r21,Y+24 + std Z+36,r18 + std Z+37,r19 + std Z+38,r20 + std Z+39,r21 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + std Z+40,r18 + std Z+41,r19 + std Z+42,r20 + std Z+43,r21 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r20,Y+31 + ldd r21,Y+32 + std Z+44,r18 + std Z+45,r19 + std Z+46,r20 + std Z+47,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_384_forward_tk, .-forkskinny_128_384_forward_tk + + .text +.global forkskinny_128_384_reverse_tk + .type forkskinny_128_384_reverse_tk, @function +forkskinny_128_384_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + std Y+17,r18 + std Y+18,r19 + std Y+19,r20 + std Y+20,r21 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r20,Z+38 + ldd r21,Z+39 + std Y+21,r18 + std Y+22,r19 + std Y+23,r20 + std Y+24,r21 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r20,Z+42 + ldd r21,Z+43 + std Y+25,r18 + std Y+26,r19 + std Y+27,r20 + std Y+28,r21 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r20,Z+46 + ldd r21,Z+47 + std Y+29,r18 + std Y+30,r19 + std Y+31,r20 + std Y+32,r21 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r23,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +83: + movw r18,r2 + movw r20,r4 + movw r26,r6 + movw r24,r8 + movw r2,r10 + movw r4,r12 + movw r6,r14 + movw r8,r16 + mov r10,r20 + mov r11,r18 + mov r12,r26 + mov r13,r25 + mov r14,r24 + mov r15,r21 + mov r16,r27 + mov r17,r19 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r24,Y+7 + ldd r25,Y+8 + ldd r23,Y+9 + std Y+1,r23 + ldd r23,Y+10 + std Y+2,r23 + ldd r23,Y+11 + std Y+3,r23 + ldd r23,Y+12 + std Y+4,r23 + ldd r23,Y+13 + std Y+5,r23 + ldd r23,Y+14 + std Y+6,r23 + ldd r23,Y+15 + std Y+7,r23 + ldd r23,Y+16 + std Y+8,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+9,r20 + std Y+10,r18 + std Y+11,r26 + std Y+12,r25 + std Y+13,r24 + std Y+14,r21 + std Y+15,r27 + std Y+16,r19 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + ldd r26,Y+21 + ldd r27,Y+22 + ldd r24,Y+23 + ldd r25,Y+24 + ldd r23,Y+25 + std Y+17,r23 + ldd r23,Y+26 + std Y+18,r23 + ldd r23,Y+27 + std Y+19,r23 + ldd r23,Y+28 + std Y+20,r23 + ldd r23,Y+29 + std Y+21,r23 + ldd r23,Y+30 + std Y+22,r23 + ldd r23,Y+31 + std Y+23,r23 + ldd r23,Y+32 + std Y+24,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+25,r20 + std Y+26,r18 + std Y+27,r26 + std Y+28,r25 + std Y+29,r24 + std Y+30,r21 + std Y+31,r27 + std Y+32,r19 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + breq 5183f + rjmp 83b +5183: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r2 + std Z+1,r3 + std Z+2,r4 + std Z+3,r5 + std Z+4,r6 + std Z+5,r7 + std Z+6,r8 + std Z+7,r9 + std Z+8,r10 + std Z+9,r11 + std Z+10,r12 + std Z+11,r13 + std Z+12,r14 + std Z+13,r15 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r20,Y+23 + ldd r21,Y+24 + std Z+36,r18 + std Z+37,r19 + std Z+38,r20 + std Z+39,r21 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + std Z+40,r18 + std Z+41,r19 + std Z+42,r20 + std Z+43,r21 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r20,Y+31 + ldd r21,Y+32 + std Z+44,r18 + std Z+45,r19 + std Z+46,r20 + std Z+47,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_384_reverse_tk, .-forkskinny_128_384_reverse_tk + + .text +.global forkskinny_64_192_rounds + .type forkskinny_64_192_rounds, @function +forkskinny_64_192_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 38 + ldd r26,Z+24 + ldd r27,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + ld r18,Z + ldd r19,Z+1 + std Y+1,r18 + std Y+2,r19 + ldd r18,Z+2 + ldd r19,Z+3 + std Y+3,r18 + std Y+4,r19 + ldd r18,Z+4 + ldd r19,Z+5 + std Y+5,r18 + std Y+6,r19 + ldd r18,Z+6 + ldd r19,Z+7 + std Y+7,r18 + std Y+8,r19 + ldd r18,Z+8 + ldd r19,Z+9 + std Y+9,r18 + std Y+10,r19 + ldd r18,Z+10 + ldd r19,Z+11 + std Y+11,r18 + std Y+12,r19 + ldd r18,Z+12 + ldd r19,Z+13 + std Y+13,r18 + std Y+14,r19 + ldd r18,Z+14 + ldd r19,Z+15 + std Y+15,r18 + std Y+16,r19 + ldd r18,Z+16 + ldd r19,Z+17 + std Y+17,r18 + std Y+18,r19 + ldd r18,Z+18 + ldd r19,Z+19 + std Y+19,r18 + std Y+20,r19 + ldd r18,Z+20 + ldd r19,Z+21 + std Y+21,r18 + std Y+22,r19 + ldd r18,Z+22 + ldd r19,Z+23 + std Y+23,r18 + std Y+24,r19 + push r31 + push r30 + ldi r30,lo8(table_5) + ldi r31,hi8(table_5) +#if defined(RAMPZ) + ldi r18,hh8(table_5) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 + lsl r20 +61: + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + eor r26,r18 + eor r27,r19 + ldd r18,Y+3 + ldd r19,Y+4 + eor r2,r18 + eor r3,r19 + ldd r18,Y+9 + ldd r19,Y+10 + eor r26,r18 + eor r27,r19 + ldd r18,Y+11 + ldd r19,Y+12 + eor r2,r18 + eor r3,r19 + ldd r18,Y+17 + ldd r19,Y+18 + eor r26,r18 + eor r27,r19 + ldd r18,Y+19 + ldd r19,Y+20 + eor r2,r18 + eor r3,r19 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r27,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r3,r18 + ldi r18,32 + eor r5,r18 + eor r26,r18 + mov r0,r1 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + or r3,r0 + mov r0,r4 + mov r4,r5 + mov r5,r0 + mov r0,r6 + mov r6,r7 + mov r7,r0 + mov r0,r1 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + or r7,r0 + eor r2,r4 + eor r3,r5 + eor r4,r26 + eor r5,r27 + movw r18,r6 + eor r18,r4 + eor r19,r5 + movw r6,r4 + movw r4,r2 + movw r2,r26 + movw r26,r18 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r8,Y+3 + ldd r9,Y+4 + ldd r10,Y+5 + ldd r11,Y+6 + ldd r12,Y+7 + ldd r13,Y+8 + std Y+5,r18 + std Y+6,r19 + std Y+7,r8 + std Y+8,r9 + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + ldi r25,240 + and r9,r25 + swap r12 + ldi r24,15 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+1,r18 + std Y+2,r19 + std Y+3,r8 + std Y+4,r9 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r18,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+9 + ldd r19,Y+10 + ldd r8,Y+11 + ldd r9,Y+12 + ldd r10,Y+13 + ldd r11,Y+14 + ldd r12,Y+15 + ldd r13,Y+16 + std Y+13,r18 + std Y+14,r19 + std Y+15,r8 + std Y+16,r9 + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + and r9,r25 + swap r12 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+9,r18 + std Y+10,r19 + std Y+11,r8 + std Y+12,r9 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r8,Y+19 + ldd r9,Y+20 + ldd r10,Y+21 + ldd r11,Y+22 + ldd r12,Y+23 + ldd r13,Y+24 + std Y+21,r18 + std Y+22,r19 + std Y+23,r8 + std Y+24,r9 + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + and r9,r25 + swap r12 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+17,r18 + std Y+18,r19 + std Y+19,r8 + std Y+20,r9 + ldi r30,lo8(table_5) + ldi r31,hi8(table_5) +#if defined(RAMPZ) + ldi r18,hh8(table_5) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + cp r22,r20 + breq 5273f + rjmp 61b +5273: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+24,r26 + std Z+25,r27 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Y+1 + ldd r19,Y+2 + st Z,r18 + std Z+1,r19 + ldd r18,Y+3 + ldd r19,Y+4 + std Z+2,r18 + std Z+3,r19 + ldd r18,Y+5 + ldd r19,Y+6 + std Z+4,r18 + std Z+5,r19 + ldd r18,Y+7 + ldd r19,Y+8 + std Z+6,r18 + std Z+7,r19 + ldd r18,Y+9 + ldd r19,Y+10 + std Z+8,r18 + std Z+9,r19 + ldd r18,Y+11 + ldd r19,Y+12 + std Z+10,r18 + std Z+11,r19 + ldd r18,Y+13 + ldd r19,Y+14 + std Z+12,r18 + std Z+13,r19 + ldd r18,Y+15 + ldd r19,Y+16 + std Z+14,r18 + std Z+15,r19 + ldd r18,Y+17 + ldd r19,Y+18 + std Z+16,r18 + std Z+17,r19 + ldd r18,Y+19 + ldd r19,Y+20 + std Z+18,r18 + std Z+19,r19 + ldd r18,Y+21 + ldd r19,Y+22 + std Z+20,r18 + std Z+21,r19 + ldd r18,Y+23 + ldd r19,Y+24 + std Z+22,r18 + std Z+23,r19 + adiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_rounds, .-forkskinny_64_192_rounds + + .text +.global forkskinny_64_192_inv_rounds + .type forkskinny_64_192_inv_rounds, @function +forkskinny_64_192_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 38 + ldd r26,Z+24 + ldd r27,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + ld r18,Z + ldd r19,Z+1 + std Y+1,r18 + std Y+2,r19 + ldd r18,Z+2 + ldd r19,Z+3 + std Y+3,r18 + std Y+4,r19 + ldd r18,Z+4 + ldd r19,Z+5 + std Y+5,r18 + std Y+6,r19 + ldd r18,Z+6 + ldd r19,Z+7 + std Y+7,r18 + std Y+8,r19 + ldd r18,Z+8 + ldd r19,Z+9 + std Y+9,r18 + std Y+10,r19 + ldd r18,Z+10 + ldd r19,Z+11 + std Y+11,r18 + std Y+12,r19 + ldd r18,Z+12 + ldd r19,Z+13 + std Y+13,r18 + std Y+14,r19 + ldd r18,Z+14 + ldd r19,Z+15 + std Y+15,r18 + std Y+16,r19 + ldd r18,Z+16 + ldd r19,Z+17 + std Y+17,r18 + std Y+18,r19 + ldd r18,Z+18 + ldd r19,Z+19 + std Y+19,r18 + std Y+20,r19 + ldd r18,Z+20 + ldd r19,Z+21 + std Y+21,r18 + std Y+22,r19 + ldd r18,Z+22 + ldd r19,Z+23 + std Y+23,r18 + std Y+24,r19 + push r31 + push r30 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 + lsl r20 +61: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r8,Y+3 + ldd r9,Y+4 + ldd r10,Y+5 + ldd r11,Y+6 + ldd r12,Y+7 + ldd r13,Y+8 + std Y+1,r10 + std Y+2,r11 + std Y+3,r12 + std Y+4,r13 + mov r11,r18 + ldi r25,240 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+5,r10 + std Y+6,r11 + std Y+7,r12 + std Y+8,r13 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r8,Y+11 + ldd r9,Y+12 + ldd r10,Y+13 + ldd r11,Y+14 + ldd r12,Y+15 + ldd r13,Y+16 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r11,r18 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+13,r10 + std Y+14,r11 + std Y+15,r12 + std Y+16,r13 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r18,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r8,Y+19 + ldd r9,Y+20 + ldd r10,Y+21 + ldd r11,Y+22 + ldd r12,Y+23 + ldd r13,Y+24 + std Y+17,r10 + std Y+18,r11 + std Y+19,r12 + std Y+20,r13 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r11,r18 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+21,r10 + std Y+22,r11 + std Y+23,r12 + std Y+24,r13 + movw r18,r26 + movw r26,r2 + movw r2,r4 + movw r4,r6 + movw r6,r18 + eor r6,r4 + eor r7,r5 + eor r4,r26 + eor r5,r27 + eor r2,r4 + eor r3,r5 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + mov r0,r5 + mov r5,r4 + mov r4,r0 + mov r0,r7 + mov r7,r6 + mov r6,r0 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + ldd r18,Y+1 + ldd r19,Y+2 + eor r26,r18 + eor r27,r19 + ldd r18,Y+3 + ldd r19,Y+4 + eor r2,r18 + eor r3,r19 + ldd r18,Y+9 + ldd r19,Y+10 + eor r26,r18 + eor r27,r19 + ldd r18,Y+11 + ldd r19,Y+12 + eor r2,r18 + eor r3,r19 + ldd r18,Y+17 + ldd r19,Y+18 + eor r26,r18 + eor r27,r19 + ldd r18,Y+19 + ldd r19,Y+20 + eor r2,r18 + eor r3,r19 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r3,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r27,r18 + ldi r18,32 + eor r5,r18 + eor r26,r18 + ldi r30,lo8(table_6) + ldi r31,hi8(table_6) +#if defined(RAMPZ) + ldi r18,hh8(table_6) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + cp r22,r20 + breq 5268f + rjmp 61b +5268: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+24,r26 + std Z+25,r27 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Y+1 + ldd r19,Y+2 + st Z,r18 + std Z+1,r19 + ldd r18,Y+3 + ldd r19,Y+4 + std Z+2,r18 + std Z+3,r19 + ldd r18,Y+5 + ldd r19,Y+6 + std Z+4,r18 + std Z+5,r19 + ldd r18,Y+7 + ldd r19,Y+8 + std Z+6,r18 + std Z+7,r19 + ldd r18,Y+9 + ldd r19,Y+10 + std Z+8,r18 + std Z+9,r19 + ldd r18,Y+11 + ldd r19,Y+12 + std Z+10,r18 + std Z+11,r19 + ldd r18,Y+13 + ldd r19,Y+14 + std Z+12,r18 + std Z+13,r19 + ldd r18,Y+15 + ldd r19,Y+16 + std Z+14,r18 + std Z+15,r19 + ldd r18,Y+17 + ldd r19,Y+18 + std Z+16,r18 + std Z+17,r19 + ldd r18,Y+19 + ldd r19,Y+20 + std Z+18,r18 + std Z+19,r19 + ldd r18,Y+21 + ldd r19,Y+22 + std Z+20,r18 + std Z+21,r19 + ldd r18,Y+23 + ldd r19,Y+24 + std Z+22,r18 + std Z+23,r19 + adiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_inv_rounds, .-forkskinny_64_192_inv_rounds + + .text +.global forkskinny_64_192_forward_tk + .type forkskinny_64_192_forward_tk, @function +forkskinny_64_192_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r28,Z+6 + ldd r29,Z+7 + ldd r2,Z+8 + ldd r3,Z+9 + ldd r4,Z+10 + ldd r5,Z+11 + ldd r6,Z+12 + ldd r7,Z+13 + ldd r8,Z+14 + ldd r9,Z+15 + ldd r10,Z+16 + ldd r11,Z+17 + ldd r12,Z+18 + ldd r13,Z+19 + ldd r14,Z+20 + ldd r15,Z+21 + ldd r24,Z+22 + ldd r25,Z+23 + push r31 + push r30 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +27: + push r19 + push r18 + push r21 + push r20 + mov r19,r27 + swap r19 + andi r19,240 + mov r23,r28 + andi r23,15 + or r19,r23 + mov r18,r27 + andi r18,240 + mov r23,r29 + andi r23,15 + or r18,r23 + mov r21,r26 + andi r21,240 + swap r28 + andi r28,15 + or r21,r28 + mov r20,r29 + andi r20,240 + andi r26,15 + or r20,r26 + pop r28 + pop r29 + pop r26 + pop r27 + push r3 + push r2 + push r5 + push r4 + mov r3,r7 + swap r3 + ldi r17,240 + and r3,r17 + mov r23,r8 + andi r23,15 + or r3,r23 + mov r2,r7 + and r2,r17 + mov r23,r9 + andi r23,15 + or r2,r23 + mov r5,r6 + and r5,r17 + swap r8 + ldi r16,15 + and r8,r16 + or r5,r8 + mov r4,r9 + and r4,r17 + and r6,r16 + or r4,r6 + pop r8 + pop r9 + pop r6 + pop r7 + push r11 + push r10 + push r13 + push r12 + mov r11,r15 + swap r11 + and r11,r17 + mov r23,r24 + andi r23,15 + or r11,r23 + mov r10,r15 + and r10,r17 + mov r23,r25 + andi r23,15 + or r10,r23 + mov r13,r14 + and r13,r17 + swap r24 + andi r24,15 + or r13,r24 + mov r12,r25 + and r12,r17 + and r14,r16 + or r12,r14 + pop r24 + pop r25 + pop r14 + pop r15 + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + dec r22 + breq 5125f + rjmp 27b +5125: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r26 + std Z+5,r27 + std Z+6,r28 + std Z+7,r29 + std Z+8,r2 + std Z+9,r3 + std Z+10,r4 + std Z+11,r5 + std Z+12,r6 + std Z+13,r7 + std Z+14,r8 + std Z+15,r9 + std Z+16,r10 + std Z+17,r11 + std Z+18,r12 + std Z+19,r13 + std Z+20,r14 + std Z+21,r15 + std Z+22,r24 + std Z+23,r25 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_forward_tk, .-forkskinny_64_192_forward_tk + + .text +.global forkskinny_64_192_reverse_tk + .type forkskinny_64_192_reverse_tk, @function +forkskinny_64_192_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r28,Z+6 + ldd r29,Z+7 + ldd r2,Z+8 + ldd r3,Z+9 + ldd r4,Z+10 + ldd r5,Z+11 + ldd r6,Z+12 + ldd r7,Z+13 + ldd r8,Z+14 + ldd r9,Z+15 + ldd r10,Z+16 + ldd r11,Z+17 + ldd r12,Z+18 + ldd r13,Z+19 + ldd r14,Z+20 + ldd r15,Z+21 + ldd r24,Z+22 + ldd r25,Z+23 + push r31 + push r30 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +27: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + push r27 + push r26 + push r29 + push r28 + mov r27,r18 + andi r27,240 + mov r23,r19 + swap r23 + andi r23,15 + or r27,r23 + mov r26,r21 + andi r26,240 + mov r23,r20 + andi r23,15 + or r26,r23 + mov r29,r20 + andi r29,240 + andi r18,15 + or r29,r18 + mov r28,r21 + swap r28 + andi r28,240 + andi r19,15 + or r28,r19 + pop r20 + pop r21 + pop r18 + pop r19 + push r7 + push r6 + push r9 + push r8 + mov r7,r2 + ldi r17,240 + and r7,r17 + mov r23,r3 + swap r23 + andi r23,15 + or r7,r23 + mov r6,r5 + and r6,r17 + mov r23,r4 + andi r23,15 + or r6,r23 + mov r9,r4 + and r9,r17 + ldi r16,15 + and r2,r16 + or r9,r2 + mov r8,r5 + swap r8 + and r8,r17 + and r3,r16 + or r8,r3 + pop r4 + pop r5 + pop r2 + pop r3 + push r15 + push r14 + push r25 + push r24 + mov r15,r10 + and r15,r17 + mov r23,r11 + swap r23 + andi r23,15 + or r15,r23 + mov r14,r13 + and r14,r17 + mov r23,r12 + andi r23,15 + or r14,r23 + mov r25,r12 + andi r25,240 + and r10,r16 + or r25,r10 + mov r24,r13 + swap r24 + andi r24,240 + and r11,r16 + or r24,r11 + pop r12 + pop r13 + pop r10 + pop r11 + dec r22 + breq 5125f + rjmp 27b +5125: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r26 + std Z+5,r27 + std Z+6,r28 + std Z+7,r29 + std Z+8,r2 + std Z+9,r3 + std Z+10,r4 + std Z+11,r5 + std Z+12,r6 + std Z+13,r7 + std Z+14,r8 + std Z+15,r9 + std Z+16,r10 + std Z+17,r11 + std Z+18,r12 + std Z+19,r13 + std Z+20,r14 + std Z+21,r15 + std Z+22,r24 + std Z+23,r25 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_reverse_tk, .-forkskinny_64_192_reverse_tk + +#endif diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/rhys/internal-forkskinny.c b/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/rhys/internal-forkskinny.c index b050ff1..6e2ac55 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/rhys/internal-forkskinny.c +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/rhys/internal-forkskinny.c @@ -40,35 +40,10 @@ static unsigned char const RC[87] = { 0x4a, 0x14, 0x29, 0x52, 0x24, 0x48, 0x10 }; -/** - * \brief Number of rounds of ForkSkinny-128-256 before forking. - */ -#define FORKSKINNY_128_256_ROUNDS_BEFORE 21 - -/** - * \brief Number of rounds of ForkSkinny-128-256 after forking. - */ -#define FORKSKINNY_128_256_ROUNDS_AFTER 27 - -/** - * \brief State information for ForkSkinny-128-256. - */ -typedef struct -{ - uint32_t TK1[4]; /**< First part of the tweakey */ - uint32_t TK2[4]; /**< Second part of the tweakey */ - uint32_t S[4]; /**< Current block state */ +#if !defined(__AVR__) -} forkskinny_128_256_state_t; - -/** - * \brief Applies one round of ForkSkinny-128-256. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_256_round - (forkskinny_128_256_state_t *state, unsigned round) +void forkskinny_128_256_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -79,137 +54,52 @@ static void forkskinny_128_256_round s2 = state->S[2]; s3 = state->S[3]; - /* Apply the S-box to all cells in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny128_sbox(s0); + skinny128_sbox(s1); + skinny128_sbox(s2); + skinny128_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); + s2 ^= 0x02; + + /* Shift the cells in the rows right, which moves the cell + * values up closer to the MSB. That is, we do a left rotate + * on the word to rotate the cells in the word right */ + s1 = leftRotate8(s1); + s2 = leftRotate16(s2); + s3 = leftRotate24(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + + /* Permute TK1 and TK2 for the next round */ + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + } /* Save the local variables back to the state */ state->S[0] = s0; state->S[1] = s1; state->S[2] = s2; state->S[3] = s3; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(state->TK1); - skinny128_permute_tk(state->TK2); - skinny128_LFSR2(state->TK2[0]); - skinny128_LFSR2(state->TK2[1]); -} - -void forkskinny_128_256_encrypt - (const unsigned char key[32], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_256_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_128_256_ROUNDS_BEFORE; ++round) { - forkskinny_128_256_round(&state, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint32_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&state, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x08040201U; /* Branching constant */ - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - for (round = (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); ++round) { - forkskinny_128_256_round(&state, round); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&state, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - } } -/** - * \brief Applies one round of ForkSkinny-128-256 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_256_inv_round - (forkskinny_128_256_state_t *state, unsigned round) +void forkskinny_128_256_inv_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -220,39 +110,42 @@ static void forkskinny_128_256_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Permute TK1 and TK2 for the next round */ - skinny128_inv_LFSR2(state->TK2[0]); - skinny128_inv_LFSR2(state->TK2[1]); - skinny128_inv_permute_tk(state->TK1); - skinny128_inv_permute_tk(state->TK2); - - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left, which moves the cell - * values down closer to the LSB. That is, we do a right - * rotate on the word to rotate the cells in the word left */ - s1 = rightRotate8(s1); - s2 = rightRotate16(s2); - s3 = rightRotate24(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1 and TK2 for the next round */ + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left, which moves the cell + * values down closer to the LSB. That is, we do a right + * rotate on the word to rotate the cells in the word left */ + s1 = rightRotate8(s1); + s2 = rightRotate16(s2); + s3 = rightRotate24(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); + s2 ^= 0x02; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny128_inv_sbox(s0); + skinny128_inv_sbox(s1); + skinny128_inv_sbox(s2); + skinny128_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -261,115 +154,64 @@ static void forkskinny_128_256_inv_round state->S[3] = s3; } -void forkskinny_128_256_decrypt - (const unsigned char key[32], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_128_256_forward_tk + (forkskinny_128_256_state_t *state, unsigned rounds) { - forkskinny_128_256_state_t state; - forkskinny_128_256_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); ++round) { - skinny128_permute_tk(state.TK1); - skinny128_permute_tk(state.TK2); - skinny128_LFSR2(state.TK2[0]); - skinny128_LFSR2(state.TK2[1]); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR2(state->TK2[2]); + skinny128_LFSR2(state->TK2[3]); + } + rounds -= 16; } - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); - round > (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); --round) { - forkskinny_128_256_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + --rounds; } +} - /* Remove the branching constant */ - state.S[0] ^= 0x08040201U; - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_128_256_ROUNDS_AFTER; ++round) { - skinny128_inv_LFSR2(state.TK2[0]); - skinny128_inv_LFSR2(state.TK2[1]); - skinny128_inv_permute_tk(state.TK1); - skinny128_inv_permute_tk(state.TK2); +void forkskinny_128_256_reverse_tk + (forkskinny_128_256_state_t *state, unsigned rounds) +{ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR2(state->TK2[2]); + skinny128_inv_LFSR2(state->TK2[3]); + } + rounds -= 16; } - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_128_256_inv_round(&state, round - 1); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&fstate, round); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + --rounds; } - le_store_word32(output_right, fstate.S[0]); - le_store_word32(output_right + 4, fstate.S[1]); - le_store_word32(output_right + 8, fstate.S[2]); - le_store_word32(output_right + 12, fstate.S[3]); } -/** - * \brief Number of rounds of ForkSkinny-128-384 before forking. - */ -#define FORKSKINNY_128_384_ROUNDS_BEFORE 25 - -/** - * \brief Number of rounds of ForkSkinny-128-384 after forking. - */ -#define FORKSKINNY_128_384_ROUNDS_AFTER 31 - -/** - * \brief State information for ForkSkinny-128-384. - */ -typedef struct -{ - uint32_t TK1[4]; /**< First part of the tweakey */ - uint32_t TK2[4]; /**< Second part of the tweakey */ - uint32_t TK3[4]; /**< Third part of the tweakey */ - uint32_t S[4]; /**< Current block state */ - -} forkskinny_128_384_state_t; - -/** - * \brief Applies one round of ForkSkinny-128-384. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_384_round - (forkskinny_128_384_state_t *state, unsigned round) +void forkskinny_128_384_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -380,145 +222,56 @@ static void forkskinny_128_384_round s2 = state->S[2]; s3 = state->S[3]; - /* Apply the S-box to all cells in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - (rc & 0x0F) ^ 0x00020000; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny128_sbox(s0); + skinny128_sbox(s1); + skinny128_sbox(s2); + skinny128_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + (rc & 0x0F) ^ 0x00020000; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); + s2 ^= 0x02; + + /* Shift the cells in the rows right, which moves the cell + * values up closer to the MSB. That is, we do a left rotate + * on the word to rotate the cells in the word right */ + s1 = leftRotate8(s1); + s2 = leftRotate16(s2); + s3 = leftRotate24(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + + /* Permute TK1, TK2, and TK3 for the next round */ + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_permute_tk(state->TK3); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + } /* Save the local variables back to the state */ state->S[0] = s0; state->S[1] = s1; state->S[2] = s2; state->S[3] = s3; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny128_permute_tk(state->TK1); - skinny128_permute_tk(state->TK2); - skinny128_permute_tk(state->TK3); - skinny128_LFSR2(state->TK2[0]); - skinny128_LFSR2(state->TK2[1]); - skinny128_LFSR3(state->TK3[0]); - skinny128_LFSR3(state->TK3[1]); } -void forkskinny_128_384_encrypt - (const unsigned char key[48], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_384_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.TK3[0] = le_load_word32(key + 32); - state.TK3[1] = le_load_word32(key + 36); - state.TK3[2] = le_load_word32(key + 40); - state.TK3[3] = le_load_word32(key + 44); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_128_384_ROUNDS_BEFORE; ++round) { - forkskinny_128_384_round(&state, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint32_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&state, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x08040201U; /* Branching constant */ - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - for (round = (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); ++round) { - forkskinny_128_384_round(&state, round); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&state, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - } -} - -/** - * \brief Applies one round of ForkSkinny-128-384 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_384_inv_round - (forkskinny_128_384_state_t *state, unsigned round) +void forkskinny_128_384_inv_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -529,43 +282,46 @@ static void forkskinny_128_384_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Permute TK1 and TK2 for the next round */ - skinny128_inv_LFSR2(state->TK2[0]); - skinny128_inv_LFSR2(state->TK2[1]); - skinny128_inv_LFSR3(state->TK3[0]); - skinny128_inv_LFSR3(state->TK3[1]); - skinny128_inv_permute_tk(state->TK1); - skinny128_inv_permute_tk(state->TK2); - skinny128_inv_permute_tk(state->TK3); - - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left, which moves the cell - * values down closer to the LSB. That is, we do a right - * rotate on the word to rotate the cells in the word left */ - s1 = rightRotate8(s1); - s2 = rightRotate16(s2); - s3 = rightRotate24(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - (rc & 0x0F) ^ 0x00020000; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1 and TK2 for the next round */ + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + skinny128_inv_permute_tk(state->TK3); + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left, which moves the cell + * values down closer to the LSB. That is, we do a right + * rotate on the word to rotate the cells in the word left */ + s1 = rightRotate8(s1); + s2 = rightRotate16(s2); + s3 = rightRotate24(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + (rc & 0x0F) ^ 0x00020000; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); + s2 ^= 0x02; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny128_inv_sbox(s0); + skinny128_inv_sbox(s1); + skinny128_inv_sbox(s2); + skinny128_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -574,128 +330,78 @@ static void forkskinny_128_384_inv_round state->S[3] = s3; } -void forkskinny_128_384_decrypt - (const unsigned char key[48], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_128_384_forward_tk + (forkskinny_128_384_state_t *state, unsigned rounds) { - forkskinny_128_384_state_t state; - forkskinny_128_384_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.TK3[0] = le_load_word32(key + 32); - state.TK3[1] = le_load_word32(key + 36); - state.TK3[2] = le_load_word32(key + 40); - state.TK3[3] = le_load_word32(key + 44); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); ++round) { - skinny128_permute_tk(state.TK1); - skinny128_permute_tk(state.TK2); - skinny128_permute_tk(state.TK3); - skinny128_LFSR2(state.TK2[0]); - skinny128_LFSR2(state.TK2[1]); - skinny128_LFSR3(state.TK3[0]); - skinny128_LFSR3(state.TK3[1]); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR2(state->TK2[2]); + skinny128_LFSR2(state->TK2[3]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + skinny128_LFSR3(state->TK3[2]); + skinny128_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); - round > (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); --round) { - forkskinny_128_384_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_permute_tk(state->TK3); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + --rounds; } +} - /* Remove the branching constant */ - state.S[0] ^= 0x08040201U; - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_128_384_ROUNDS_AFTER; ++round) { - skinny128_inv_LFSR2(state.TK2[0]); - skinny128_inv_LFSR2(state.TK2[1]); - skinny128_inv_LFSR3(state.TK3[0]); - skinny128_inv_LFSR3(state.TK3[1]); - skinny128_inv_permute_tk(state.TK1); - skinny128_inv_permute_tk(state.TK2); - skinny128_inv_permute_tk(state.TK3); +void forkskinny_128_384_reverse_tk + (forkskinny_128_384_state_t *state, unsigned rounds) +{ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR2(state->TK2[2]); + skinny128_inv_LFSR2(state->TK2[3]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_LFSR3(state->TK3[2]); + skinny128_inv_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_128_384_inv_round(&state, round - 1); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&fstate, round); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + skinny128_inv_permute_tk(state->TK3); + --rounds; } - le_store_word32(output_right, fstate.S[0]); - le_store_word32(output_right + 4, fstate.S[1]); - le_store_word32(output_right + 8, fstate.S[2]); - le_store_word32(output_right + 12, fstate.S[3]); } -/** - * \brief Number of rounds of ForkSkinny-64-192 before forking. - */ -#define FORKSKINNY_64_192_ROUNDS_BEFORE 17 - -/** - * \brief Number of rounds of ForkSkinny-64-192 after forking. - */ -#define FORKSKINNY_64_192_ROUNDS_AFTER 23 - -/** - * \brief State information for ForkSkinny-64-192. - */ -typedef struct -{ - uint16_t TK1[4]; /**< First part of the tweakey */ - uint16_t TK2[4]; /**< Second part of the tweakey */ - uint16_t TK3[4]; /**< Third part of the tweakey */ - uint16_t S[4]; /**< Current block state */ - -} forkskinny_64_192_state_t; - -/** - * \brief Applies one round of ForkSkinny-64-192. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - * - * Note: The cells of each row are order in big-endian nibble order - * so it is easiest to manage the rows in bit-endian byte order. - */ -static void forkskinny_64_192_round - (forkskinny_64_192_state_t *state, unsigned round) +void forkskinny_64_192_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last) { uint16_t s0, s1, s2, s3, temp; uint8_t rc; @@ -706,144 +412,55 @@ static void forkskinny_64_192_round s2 = state->S[2]; s3 = state->S[3]; - /* Apply the S-box to all cells in the state */ - skinny64_sbox(s0); - skinny64_sbox(s1); - skinny64_sbox(s2); - skinny64_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - ((rc & 0x0F) << 12) ^ 0x0020; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ - ((rc & 0x70) << 8); - s2 ^= 0x2000; - - /* Shift the cells in the rows right */ - s1 = rightRotate4_16(s1); - s2 = rightRotate8_16(s2); - s3 = rightRotate12_16(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny64_sbox(s0); + skinny64_sbox(s1); + skinny64_sbox(s2); + skinny64_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + ((rc & 0x0F) << 12) ^ 0x0020; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ + ((rc & 0x70) << 8); + s2 ^= 0x2000; + + /* Shift the cells in the rows right */ + s1 = rightRotate4_16(s1); + s2 = rightRotate8_16(s2); + s3 = rightRotate12_16(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + + /* Permute TK1, TK2, and TK3 for the next round */ + skinny64_permute_tk(state->TK1); + skinny64_permute_tk(state->TK2); + skinny64_permute_tk(state->TK3); + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + } /* Save the local variables back to the state */ state->S[0] = s0; state->S[1] = s1; state->S[2] = s2; state->S[3] = s3; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny64_permute_tk(state->TK1); - skinny64_permute_tk(state->TK2); - skinny64_permute_tk(state->TK3); - skinny64_LFSR2(state->TK2[0]); - skinny64_LFSR2(state->TK2[1]); - skinny64_LFSR3(state->TK3[0]); - skinny64_LFSR3(state->TK3[1]); } -void forkskinny_64_192_encrypt - (const unsigned char key[24], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_64_192_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = be_load_word16(key); - state.TK1[1] = be_load_word16(key + 2); - state.TK1[2] = be_load_word16(key + 4); - state.TK1[3] = be_load_word16(key + 6); - state.TK2[0] = be_load_word16(key + 8); - state.TK2[1] = be_load_word16(key + 10); - state.TK2[2] = be_load_word16(key + 12); - state.TK2[3] = be_load_word16(key + 14); - state.TK3[0] = be_load_word16(key + 16); - state.TK3[1] = be_load_word16(key + 18); - state.TK3[2] = be_load_word16(key + 20); - state.TK3[3] = be_load_word16(key + 22); - state.S[0] = be_load_word16(input); - state.S[1] = be_load_word16(input + 2); - state.S[2] = be_load_word16(input + 4); - state.S[3] = be_load_word16(input + 6); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_64_192_ROUNDS_BEFORE; ++round) { - forkskinny_64_192_round(&state, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint16_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_right, state.S[0]); - be_store_word16(output_right + 2, state.S[1]); - be_store_word16(output_right + 4, state.S[2]); - be_store_word16(output_right + 6, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x1249U; /* Branching constant */ - state.S[1] ^= 0x36daU; - state.S[2] ^= 0x5b7fU; - state.S[3] ^= 0xec81U; - for (round = (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_left, state.S[0]); - be_store_word16(output_left + 2, state.S[1]); - be_store_word16(output_left + 4, state.S[2]); - be_store_word16(output_left + 6, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_right, state.S[0]); - be_store_word16(output_right + 2, state.S[1]); - be_store_word16(output_right + 4, state.S[2]); - be_store_word16(output_right + 6, state.S[3]); - } -} - -/** - * \brief Applies one round of ForkSkinny-64-192 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_64_192_inv_round - (forkskinny_64_192_state_t *state, unsigned round) +void forkskinny_64_192_inv_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last) { uint16_t s0, s1, s2, s3, temp; uint8_t rc; @@ -854,42 +471,45 @@ static void forkskinny_64_192_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Permute TK1, TK2, and TK3 for the next round */ - skinny64_inv_LFSR2(state->TK2[0]); - skinny64_inv_LFSR2(state->TK2[1]); - skinny64_inv_LFSR3(state->TK3[0]); - skinny64_inv_LFSR3(state->TK3[1]); - skinny64_inv_permute_tk(state->TK1); - skinny64_inv_permute_tk(state->TK2); - skinny64_inv_permute_tk(state->TK3); - - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left */ - s1 = leftRotate4_16(s1); - s2 = leftRotate8_16(s2); - s3 = leftRotate12_16(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - ((rc & 0x0F) << 12) ^ 0x0020; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ - ((rc & 0x70) << 8); - s2 ^= 0x2000; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny64_inv_sbox(s0); - skinny64_inv_sbox(s1); - skinny64_inv_sbox(s2); - skinny64_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1, TK2, and TK3 for the next round */ + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_permute_tk(state->TK1); + skinny64_inv_permute_tk(state->TK2); + skinny64_inv_permute_tk(state->TK3); + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left */ + s1 = leftRotate4_16(s1); + s2 = leftRotate8_16(s2); + s3 = leftRotate12_16(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + ((rc & 0x0F) << 12) ^ 0x0020; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ + ((rc & 0x70) << 8); + s2 ^= 0x2000; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny64_inv_sbox(s0); + skinny64_inv_sbox(s1); + skinny64_inv_sbox(s2); + skinny64_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -898,91 +518,74 @@ static void forkskinny_64_192_inv_round state->S[3] = s3; } -void forkskinny_64_192_decrypt - (const unsigned char key[24], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_64_192_forward_tk + (forkskinny_64_192_state_t *state, unsigned rounds) { - forkskinny_64_192_state_t state; - forkskinny_64_192_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = be_load_word16(key); - state.TK1[1] = be_load_word16(key + 2); - state.TK1[2] = be_load_word16(key + 4); - state.TK1[3] = be_load_word16(key + 6); - state.TK2[0] = be_load_word16(key + 8); - state.TK2[1] = be_load_word16(key + 10); - state.TK2[2] = be_load_word16(key + 12); - state.TK2[3] = be_load_word16(key + 14); - state.TK3[0] = be_load_word16(key + 16); - state.TK3[1] = be_load_word16(key + 18); - state.TK3[2] = be_load_word16(key + 20); - state.TK3[3] = be_load_word16(key + 22); - state.S[0] = be_load_word16(input); - state.S[1] = be_load_word16(input + 2); - state.S[2] = be_load_word16(input + 4); - state.S[3] = be_load_word16(input + 6); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); ++round) { - skinny64_permute_tk(state.TK1); - skinny64_permute_tk(state.TK2); - skinny64_permute_tk(state.TK3); - skinny64_LFSR2(state.TK2[0]); - skinny64_LFSR2(state.TK2[1]); - skinny64_LFSR3(state.TK3[0]); - skinny64_LFSR3(state.TK3[1]); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny64_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR2(state->TK2[2]); + skinny64_LFSR2(state->TK2[3]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + skinny64_LFSR3(state->TK3[2]); + skinny64_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); - round > (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); --round) { - forkskinny_64_192_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny64_permute_tk(state->TK1); + skinny64_permute_tk(state->TK2); + skinny64_permute_tk(state->TK3); + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + --rounds; } +} - /* Remove the branching constant */ - state.S[0] ^= 0x1249U; - state.S[1] ^= 0x36daU; - state.S[2] ^= 0x5b7fU; - state.S[3] ^= 0xec81U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_64_192_ROUNDS_AFTER; ++round) { - skinny64_inv_LFSR2(state.TK2[0]); - skinny64_inv_LFSR2(state.TK2[1]); - skinny64_inv_LFSR3(state.TK3[0]); - skinny64_inv_LFSR3(state.TK3[1]); - skinny64_inv_permute_tk(state.TK1); - skinny64_inv_permute_tk(state.TK2); - skinny64_inv_permute_tk(state.TK3); +void forkskinny_64_192_reverse_tk + (forkskinny_64_192_state_t *state, unsigned rounds) +{ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny64_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR2(state->TK2[2]); + skinny64_inv_LFSR2(state->TK2[3]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_LFSR3(state->TK3[2]); + skinny64_inv_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_64_192_inv_round(&state, round - 1); - } - be_store_word16(output_left, state.S[0]); - be_store_word16(output_left + 2, state.S[1]); - be_store_word16(output_left + 4, state.S[2]); - be_store_word16(output_left + 6, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&fstate, round); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_permute_tk(state->TK1); + skinny64_inv_permute_tk(state->TK2); + skinny64_inv_permute_tk(state->TK3); + --rounds; } - be_store_word16(output_right, fstate.S[0]); - be_store_word16(output_right + 2, fstate.S[1]); - be_store_word16(output_right + 4, fstate.S[2]); - be_store_word16(output_right + 6, fstate.S[3]); } + +#endif /* !__AVR__ */ diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/rhys/internal-forkskinny.h b/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/rhys/internal-forkskinny.h index 0c1a707..e3014d3 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/rhys/internal-forkskinny.h +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/rhys/internal-forkskinny.h @@ -23,6 +23,8 @@ #ifndef LW_INTERNAL_FORKSKINNY_H #define LW_INTERNAL_FORKSKINNY_H +#include "internal-util.h" + /** * \file internal-forkskinny.h * \brief ForkSkinny block cipher family. @@ -39,6 +41,158 @@ extern "C" { #endif /** + * \brief State information for ForkSkinny-128-256. + */ +typedef struct +{ + uint32_t TK1[4]; /**< First part of the tweakey */ + uint32_t TK2[4]; /**< Second part of the tweakey */ + uint32_t S[4]; /**< Current block state */ + +} forkskinny_128_256_state_t; + +/** + * \brief State information for ForkSkinny-128-384. + */ +typedef struct +{ + uint32_t TK1[4]; /**< First part of the tweakey */ + uint32_t TK2[4]; /**< Second part of the tweakey */ + uint32_t TK3[4]; /**< Third part of the tweakey */ + uint32_t S[4]; /**< Current block state */ + +} forkskinny_128_384_state_t; + +/** + * \brief State information for ForkSkinny-64-192. + */ +typedef struct +{ + uint16_t TK1[4]; /**< First part of the tweakey */ + uint16_t TK2[4]; /**< Second part of the tweakey */ + uint16_t TK3[4]; /**< Third part of the tweakey */ + uint16_t S[4]; /**< Current block state */ + +} forkskinny_64_192_state_t; + +/** + * \brief Applies several rounds of ForkSkinny-128-256. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + */ +void forkskinny_128_256_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-128-256 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_128_256_inv_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-128-256. + * + * \param state Points to the ForkSkinny-128-256 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_128_256_forward_tk + (forkskinny_128_256_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-128-256. + * + * \param state Points to the ForkSkinny-128-256 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_128_256_reverse_tk + (forkskinny_128_256_state_t *state, unsigned rounds); + +/** + * \brief Applies several rounds of ForkSkinny-128-384. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + */ +void forkskinny_128_384_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-128-384 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_128_384_inv_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-128-384. + * + * \param state Points to the ForkSkinny-128-384 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_128_384_forward_tk + (forkskinny_128_384_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-128-384. + * + * \param state Points to the ForkSkinny-128-384 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_128_384_reverse_tk + (forkskinny_128_384_state_t *state, unsigned rounds); + +/** + * \brief Applies several rounds of ForkSkinny-64-192. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + * + * Note: The cells of each row are ordered in big-endian nibble order + * so it is simplest to manage the rows in big-endian byte order. + */ +void forkskinny_64_192_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-64-192 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_64_192_inv_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-64-192. + * + * \param state Points to the ForkSkinny-64-192 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_64_192_forward_tk + (forkskinny_64_192_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-64-192. + * + * \param state Points to the ForkSkinny-64-192 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_64_192_reverse_tk + (forkskinny_64_192_state_t *state, unsigned rounds); + +/** * \brief Encrypts a block of plaintext with ForkSkinny-128-256. * * \param key 256-bit tweakey for ForkSkinny-128-256. diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/rhys/internal-skinnyutil.h b/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/rhys/internal-skinnyutil.h index 83136cb..8a5296d 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/rhys/internal-skinnyutil.h +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/rhys/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/rhys/forkae.c b/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/rhys/forkae.c index 4a9671a..49e7610 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/rhys/forkae.c +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/rhys/forkae.c @@ -22,7 +22,6 @@ #include "forkae.h" #include "internal-forkskinny.h" -#include "internal-util.h" #include aead_cipher_t const forkae_paef_64_192_cipher = { @@ -138,3 +137,476 @@ aead_cipher_t const forkae_saef_128_256_cipher = { #define FORKAE_TWEAKEY_REDUCED_SIZE 32 #define FORKAE_BLOCK_FUNC forkskinny_128_256 #include "internal-forkae-saef.h" + +/* Helper functions to implement the forking encrypt/decrypt block operations + * on top of the basic "perform N rounds" functions in internal-forkskinny.c */ + +/** + * \brief Number of rounds of ForkSkinny-128-256 before forking. + */ +#define FORKSKINNY_128_256_ROUNDS_BEFORE 21 + +/** + * \brief Number of rounds of ForkSkinny-128-256 after forking. + */ +#define FORKSKINNY_128_256_ROUNDS_AFTER 27 + +void forkskinny_128_256_encrypt + (const unsigned char key[32], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_256_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Run all of the rounds before the forking point */ + forkskinny_128_256_rounds(&state, 0, FORKSKINNY_128_256_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint32_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x08040201U; /* Branching constant */ + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + } +} + +void forkskinny_128_256_decrypt + (const unsigned char key[32], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_256_state_t state; + forkskinny_128_256_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_128_256_forward_tk + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_128_256_inv_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x08040201U; + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_128_256_reverse_tk(&state, FORKSKINNY_128_256_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_128_256_inv_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, 0); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_128_256_rounds + (&fstate, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, fstate.S[0]); + le_store_word32(output_right + 4, fstate.S[1]); + le_store_word32(output_right + 8, fstate.S[2]); + le_store_word32(output_right + 12, fstate.S[3]); +} + +/** + * \brief Number of rounds of ForkSkinny-128-384 before forking. + */ +#define FORKSKINNY_128_384_ROUNDS_BEFORE 25 + +/** + * \brief Number of rounds of ForkSkinny-128-384 after forking. + */ +#define FORKSKINNY_128_384_ROUNDS_AFTER 31 + +void forkskinny_128_384_encrypt + (const unsigned char key[48], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_384_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.TK3[0] = le_load_word32(key + 32); + state.TK3[1] = le_load_word32(key + 36); + state.TK3[2] = le_load_word32(key + 40); + state.TK3[3] = le_load_word32(key + 44); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Run all of the rounds before the forking point */ + forkskinny_128_384_rounds(&state, 0, FORKSKINNY_128_384_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint32_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x08040201U; /* Branching constant */ + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + } +} + +void forkskinny_128_384_decrypt + (const unsigned char key[48], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_384_state_t state; + forkskinny_128_384_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.TK3[0] = le_load_word32(key + 32); + state.TK3[1] = le_load_word32(key + 36); + state.TK3[2] = le_load_word32(key + 40); + state.TK3[3] = le_load_word32(key + 44); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_128_384_forward_tk + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_128_384_inv_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x08040201U; + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_128_384_reverse_tk(&state, FORKSKINNY_128_384_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_128_384_inv_rounds(&state, FORKSKINNY_128_384_ROUNDS_BEFORE, 0); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_128_384_rounds + (&fstate, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, fstate.S[0]); + le_store_word32(output_right + 4, fstate.S[1]); + le_store_word32(output_right + 8, fstate.S[2]); + le_store_word32(output_right + 12, fstate.S[3]); +} + +/** + * \brief Number of rounds of ForkSkinny-64-192 before forking. + */ +#define FORKSKINNY_64_192_ROUNDS_BEFORE 17 + +/** + * \brief Number of rounds of ForkSkinny-64-192 after forking. + */ +#define FORKSKINNY_64_192_ROUNDS_AFTER 23 + +void forkskinny_64_192_encrypt + (const unsigned char key[24], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_64_192_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = be_load_word16(key); + state.TK1[1] = be_load_word16(key + 2); + state.TK1[2] = be_load_word16(key + 4); + state.TK1[3] = be_load_word16(key + 6); + state.TK2[0] = be_load_word16(key + 8); + state.TK2[1] = be_load_word16(key + 10); + state.TK2[2] = be_load_word16(key + 12); + state.TK2[3] = be_load_word16(key + 14); + state.TK3[0] = be_load_word16(key + 16); + state.TK3[1] = be_load_word16(key + 18); + state.TK3[2] = be_load_word16(key + 20); + state.TK3[3] = be_load_word16(key + 22); + state.S[0] = be_load_word16(input); + state.S[1] = be_load_word16(input + 2); + state.S[2] = be_load_word16(input + 4); + state.S[3] = be_load_word16(input + 6); + + /* Run all of the rounds before the forking point */ + forkskinny_64_192_rounds(&state, 0, FORKSKINNY_64_192_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint16_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, state.S[0]); + be_store_word16(output_right + 2, state.S[1]); + be_store_word16(output_right + 4, state.S[2]); + be_store_word16(output_right + 6, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x1249U; /* Branching constant */ + state.S[1] ^= 0x36daU; + state.S[2] ^= 0x5b7fU; + state.S[3] ^= 0xec81U; + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2); + be_store_word16(output_left, state.S[0]); + be_store_word16(output_left + 2, state.S[1]); + be_store_word16(output_left + 4, state.S[2]); + be_store_word16(output_left + 6, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, state.S[0]); + be_store_word16(output_right + 2, state.S[1]); + be_store_word16(output_right + 4, state.S[2]); + be_store_word16(output_right + 6, state.S[3]); + } +} + +void forkskinny_64_192_decrypt + (const unsigned char key[24], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_64_192_state_t state; + forkskinny_64_192_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = be_load_word16(key); + state.TK1[1] = be_load_word16(key + 2); + state.TK1[2] = be_load_word16(key + 4); + state.TK1[3] = be_load_word16(key + 6); + state.TK2[0] = be_load_word16(key + 8); + state.TK2[1] = be_load_word16(key + 10); + state.TK2[2] = be_load_word16(key + 12); + state.TK2[3] = be_load_word16(key + 14); + state.TK3[0] = be_load_word16(key + 16); + state.TK3[1] = be_load_word16(key + 18); + state.TK3[2] = be_load_word16(key + 20); + state.TK3[3] = be_load_word16(key + 22); + state.S[0] = be_load_word16(input); + state.S[1] = be_load_word16(input + 2); + state.S[2] = be_load_word16(input + 4); + state.S[3] = be_load_word16(input + 6); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_64_192_forward_tk + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_64_192_inv_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x1249U; + state.S[1] ^= 0x36daU; + state.S[2] ^= 0x5b7fU; + state.S[3] ^= 0xec81U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_64_192_reverse_tk(&state, FORKSKINNY_64_192_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_64_192_inv_rounds(&state, FORKSKINNY_64_192_ROUNDS_BEFORE, 0); + be_store_word16(output_left, state.S[0]); + be_store_word16(output_left + 2, state.S[1]); + be_store_word16(output_left + 4, state.S[2]); + be_store_word16(output_left + 6, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_64_192_rounds + (&fstate, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, fstate.S[0]); + be_store_word16(output_right + 2, fstate.S[1]); + be_store_word16(output_right + 4, fstate.S[2]); + be_store_word16(output_right + 6, fstate.S[3]); +} diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/rhys/internal-forkskinny-avr.S b/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/rhys/internal-forkskinny-avr.S new file mode 100644 index 0000000..c7e0b37 --- /dev/null +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/rhys/internal-forkskinny-avr.S @@ -0,0 +1,8880 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_0, @object + .size table_0, 256 +table_0: + .byte 101 + .byte 76 + .byte 106 + .byte 66 + .byte 75 + .byte 99 + .byte 67 + .byte 107 + .byte 85 + .byte 117 + .byte 90 + .byte 122 + .byte 83 + .byte 115 + .byte 91 + .byte 123 + .byte 53 + .byte 140 + .byte 58 + .byte 129 + .byte 137 + .byte 51 + .byte 128 + .byte 59 + .byte 149 + .byte 37 + .byte 152 + .byte 42 + .byte 144 + .byte 35 + .byte 153 + .byte 43 + .byte 229 + .byte 204 + .byte 232 + .byte 193 + .byte 201 + .byte 224 + .byte 192 + .byte 233 + .byte 213 + .byte 245 + .byte 216 + .byte 248 + .byte 208 + .byte 240 + .byte 217 + .byte 249 + .byte 165 + .byte 28 + .byte 168 + .byte 18 + .byte 27 + .byte 160 + .byte 19 + .byte 169 + .byte 5 + .byte 181 + .byte 10 + .byte 184 + .byte 3 + .byte 176 + .byte 11 + .byte 185 + .byte 50 + .byte 136 + .byte 60 + .byte 133 + .byte 141 + .byte 52 + .byte 132 + .byte 61 + .byte 145 + .byte 34 + .byte 156 + .byte 44 + .byte 148 + .byte 36 + .byte 157 + .byte 45 + .byte 98 + .byte 74 + .byte 108 + .byte 69 + .byte 77 + .byte 100 + .byte 68 + .byte 109 + .byte 82 + .byte 114 + .byte 92 + .byte 124 + .byte 84 + .byte 116 + .byte 93 + .byte 125 + .byte 161 + .byte 26 + .byte 172 + .byte 21 + .byte 29 + .byte 164 + .byte 20 + .byte 173 + .byte 2 + .byte 177 + .byte 12 + .byte 188 + .byte 4 + .byte 180 + .byte 13 + .byte 189 + .byte 225 + .byte 200 + .byte 236 + .byte 197 + .byte 205 + .byte 228 + .byte 196 + .byte 237 + .byte 209 + .byte 241 + .byte 220 + .byte 252 + .byte 212 + .byte 244 + .byte 221 + .byte 253 + .byte 54 + .byte 142 + .byte 56 + .byte 130 + .byte 139 + .byte 48 + .byte 131 + .byte 57 + .byte 150 + .byte 38 + .byte 154 + .byte 40 + .byte 147 + .byte 32 + .byte 155 + .byte 41 + .byte 102 + .byte 78 + .byte 104 + .byte 65 + .byte 73 + .byte 96 + .byte 64 + .byte 105 + .byte 86 + .byte 118 + .byte 88 + .byte 120 + .byte 80 + .byte 112 + .byte 89 + .byte 121 + .byte 166 + .byte 30 + .byte 170 + .byte 17 + .byte 25 + .byte 163 + .byte 16 + .byte 171 + .byte 6 + .byte 182 + .byte 8 + .byte 186 + .byte 0 + .byte 179 + .byte 9 + .byte 187 + .byte 230 + .byte 206 + .byte 234 + .byte 194 + .byte 203 + .byte 227 + .byte 195 + .byte 235 + .byte 214 + .byte 246 + .byte 218 + .byte 250 + .byte 211 + .byte 243 + .byte 219 + .byte 251 + .byte 49 + .byte 138 + .byte 62 + .byte 134 + .byte 143 + .byte 55 + .byte 135 + .byte 63 + .byte 146 + .byte 33 + .byte 158 + .byte 46 + .byte 151 + .byte 39 + .byte 159 + .byte 47 + .byte 97 + .byte 72 + .byte 110 + .byte 70 + .byte 79 + .byte 103 + .byte 71 + .byte 111 + .byte 81 + .byte 113 + .byte 94 + .byte 126 + .byte 87 + .byte 119 + .byte 95 + .byte 127 + .byte 162 + .byte 24 + .byte 174 + .byte 22 + .byte 31 + .byte 167 + .byte 23 + .byte 175 + .byte 1 + .byte 178 + .byte 14 + .byte 190 + .byte 7 + .byte 183 + .byte 15 + .byte 191 + .byte 226 + .byte 202 + .byte 238 + .byte 198 + .byte 207 + .byte 231 + .byte 199 + .byte 239 + .byte 210 + .byte 242 + .byte 222 + .byte 254 + .byte 215 + .byte 247 + .byte 223 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_1, @object + .size table_1, 256 +table_1: + .byte 172 + .byte 232 + .byte 104 + .byte 60 + .byte 108 + .byte 56 + .byte 168 + .byte 236 + .byte 170 + .byte 174 + .byte 58 + .byte 62 + .byte 106 + .byte 110 + .byte 234 + .byte 238 + .byte 166 + .byte 163 + .byte 51 + .byte 54 + .byte 102 + .byte 99 + .byte 227 + .byte 230 + .byte 225 + .byte 164 + .byte 97 + .byte 52 + .byte 49 + .byte 100 + .byte 161 + .byte 228 + .byte 141 + .byte 201 + .byte 73 + .byte 29 + .byte 77 + .byte 25 + .byte 137 + .byte 205 + .byte 139 + .byte 143 + .byte 27 + .byte 31 + .byte 75 + .byte 79 + .byte 203 + .byte 207 + .byte 133 + .byte 192 + .byte 64 + .byte 21 + .byte 69 + .byte 16 + .byte 128 + .byte 197 + .byte 130 + .byte 135 + .byte 18 + .byte 23 + .byte 66 + .byte 71 + .byte 194 + .byte 199 + .byte 150 + .byte 147 + .byte 3 + .byte 6 + .byte 86 + .byte 83 + .byte 211 + .byte 214 + .byte 209 + .byte 148 + .byte 81 + .byte 4 + .byte 1 + .byte 84 + .byte 145 + .byte 212 + .byte 156 + .byte 216 + .byte 88 + .byte 12 + .byte 92 + .byte 8 + .byte 152 + .byte 220 + .byte 154 + .byte 158 + .byte 10 + .byte 14 + .byte 90 + .byte 94 + .byte 218 + .byte 222 + .byte 149 + .byte 208 + .byte 80 + .byte 5 + .byte 85 + .byte 0 + .byte 144 + .byte 213 + .byte 146 + .byte 151 + .byte 2 + .byte 7 + .byte 82 + .byte 87 + .byte 210 + .byte 215 + .byte 157 + .byte 217 + .byte 89 + .byte 13 + .byte 93 + .byte 9 + .byte 153 + .byte 221 + .byte 155 + .byte 159 + .byte 11 + .byte 15 + .byte 91 + .byte 95 + .byte 219 + .byte 223 + .byte 22 + .byte 19 + .byte 131 + .byte 134 + .byte 70 + .byte 67 + .byte 195 + .byte 198 + .byte 65 + .byte 20 + .byte 193 + .byte 132 + .byte 17 + .byte 68 + .byte 129 + .byte 196 + .byte 28 + .byte 72 + .byte 200 + .byte 140 + .byte 76 + .byte 24 + .byte 136 + .byte 204 + .byte 26 + .byte 30 + .byte 138 + .byte 142 + .byte 74 + .byte 78 + .byte 202 + .byte 206 + .byte 53 + .byte 96 + .byte 224 + .byte 165 + .byte 101 + .byte 48 + .byte 160 + .byte 229 + .byte 50 + .byte 55 + .byte 162 + .byte 167 + .byte 98 + .byte 103 + .byte 226 + .byte 231 + .byte 61 + .byte 105 + .byte 233 + .byte 173 + .byte 109 + .byte 57 + .byte 169 + .byte 237 + .byte 59 + .byte 63 + .byte 171 + .byte 175 + .byte 107 + .byte 111 + .byte 235 + .byte 239 + .byte 38 + .byte 35 + .byte 179 + .byte 182 + .byte 118 + .byte 115 + .byte 243 + .byte 246 + .byte 113 + .byte 36 + .byte 241 + .byte 180 + .byte 33 + .byte 116 + .byte 177 + .byte 244 + .byte 44 + .byte 120 + .byte 248 + .byte 188 + .byte 124 + .byte 40 + .byte 184 + .byte 252 + .byte 42 + .byte 46 + .byte 186 + .byte 190 + .byte 122 + .byte 126 + .byte 250 + .byte 254 + .byte 37 + .byte 112 + .byte 240 + .byte 181 + .byte 117 + .byte 32 + .byte 176 + .byte 245 + .byte 34 + .byte 39 + .byte 178 + .byte 183 + .byte 114 + .byte 119 + .byte 242 + .byte 247 + .byte 45 + .byte 121 + .byte 249 + .byte 189 + .byte 125 + .byte 41 + .byte 185 + .byte 253 + .byte 43 + .byte 47 + .byte 187 + .byte 191 + .byte 123 + .byte 127 + .byte 251 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_2, @object + .size table_2, 256 +table_2: + .byte 0 + .byte 2 + .byte 4 + .byte 6 + .byte 8 + .byte 10 + .byte 12 + .byte 14 + .byte 16 + .byte 18 + .byte 20 + .byte 22 + .byte 24 + .byte 26 + .byte 28 + .byte 30 + .byte 32 + .byte 34 + .byte 36 + .byte 38 + .byte 40 + .byte 42 + .byte 44 + .byte 46 + .byte 48 + .byte 50 + .byte 52 + .byte 54 + .byte 56 + .byte 58 + .byte 60 + .byte 62 + .byte 65 + .byte 67 + .byte 69 + .byte 71 + .byte 73 + .byte 75 + .byte 77 + .byte 79 + .byte 81 + .byte 83 + .byte 85 + .byte 87 + .byte 89 + .byte 91 + .byte 93 + .byte 95 + .byte 97 + .byte 99 + .byte 101 + .byte 103 + .byte 105 + .byte 107 + .byte 109 + .byte 111 + .byte 113 + .byte 115 + .byte 117 + .byte 119 + .byte 121 + .byte 123 + .byte 125 + .byte 127 + .byte 128 + .byte 130 + .byte 132 + .byte 134 + .byte 136 + .byte 138 + .byte 140 + .byte 142 + .byte 144 + .byte 146 + .byte 148 + .byte 150 + .byte 152 + .byte 154 + .byte 156 + .byte 158 + .byte 160 + .byte 162 + .byte 164 + .byte 166 + .byte 168 + .byte 170 + .byte 172 + .byte 174 + .byte 176 + .byte 178 + .byte 180 + .byte 182 + .byte 184 + .byte 186 + .byte 188 + .byte 190 + .byte 193 + .byte 195 + .byte 197 + .byte 199 + .byte 201 + .byte 203 + .byte 205 + .byte 207 + .byte 209 + .byte 211 + .byte 213 + .byte 215 + .byte 217 + .byte 219 + .byte 221 + .byte 223 + .byte 225 + .byte 227 + .byte 229 + .byte 231 + .byte 233 + .byte 235 + .byte 237 + .byte 239 + .byte 241 + .byte 243 + .byte 245 + .byte 247 + .byte 249 + .byte 251 + .byte 253 + .byte 255 + .byte 1 + .byte 3 + .byte 5 + .byte 7 + .byte 9 + .byte 11 + .byte 13 + .byte 15 + .byte 17 + .byte 19 + .byte 21 + .byte 23 + .byte 25 + .byte 27 + .byte 29 + .byte 31 + .byte 33 + .byte 35 + .byte 37 + .byte 39 + .byte 41 + .byte 43 + .byte 45 + .byte 47 + .byte 49 + .byte 51 + .byte 53 + .byte 55 + .byte 57 + .byte 59 + .byte 61 + .byte 63 + .byte 64 + .byte 66 + .byte 68 + .byte 70 + .byte 72 + .byte 74 + .byte 76 + .byte 78 + .byte 80 + .byte 82 + .byte 84 + .byte 86 + .byte 88 + .byte 90 + .byte 92 + .byte 94 + .byte 96 + .byte 98 + .byte 100 + .byte 102 + .byte 104 + .byte 106 + .byte 108 + .byte 110 + .byte 112 + .byte 114 + .byte 116 + .byte 118 + .byte 120 + .byte 122 + .byte 124 + .byte 126 + .byte 129 + .byte 131 + .byte 133 + .byte 135 + .byte 137 + .byte 139 + .byte 141 + .byte 143 + .byte 145 + .byte 147 + .byte 149 + .byte 151 + .byte 153 + .byte 155 + .byte 157 + .byte 159 + .byte 161 + .byte 163 + .byte 165 + .byte 167 + .byte 169 + .byte 171 + .byte 173 + .byte 175 + .byte 177 + .byte 179 + .byte 181 + .byte 183 + .byte 185 + .byte 187 + .byte 189 + .byte 191 + .byte 192 + .byte 194 + .byte 196 + .byte 198 + .byte 200 + .byte 202 + .byte 204 + .byte 206 + .byte 208 + .byte 210 + .byte 212 + .byte 214 + .byte 216 + .byte 218 + .byte 220 + .byte 222 + .byte 224 + .byte 226 + .byte 228 + .byte 230 + .byte 232 + .byte 234 + .byte 236 + .byte 238 + .byte 240 + .byte 242 + .byte 244 + .byte 246 + .byte 248 + .byte 250 + .byte 252 + .byte 254 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_3, @object + .size table_3, 256 +table_3: + .byte 0 + .byte 128 + .byte 1 + .byte 129 + .byte 2 + .byte 130 + .byte 3 + .byte 131 + .byte 4 + .byte 132 + .byte 5 + .byte 133 + .byte 6 + .byte 134 + .byte 7 + .byte 135 + .byte 8 + .byte 136 + .byte 9 + .byte 137 + .byte 10 + .byte 138 + .byte 11 + .byte 139 + .byte 12 + .byte 140 + .byte 13 + .byte 141 + .byte 14 + .byte 142 + .byte 15 + .byte 143 + .byte 16 + .byte 144 + .byte 17 + .byte 145 + .byte 18 + .byte 146 + .byte 19 + .byte 147 + .byte 20 + .byte 148 + .byte 21 + .byte 149 + .byte 22 + .byte 150 + .byte 23 + .byte 151 + .byte 24 + .byte 152 + .byte 25 + .byte 153 + .byte 26 + .byte 154 + .byte 27 + .byte 155 + .byte 28 + .byte 156 + .byte 29 + .byte 157 + .byte 30 + .byte 158 + .byte 31 + .byte 159 + .byte 160 + .byte 32 + .byte 161 + .byte 33 + .byte 162 + .byte 34 + .byte 163 + .byte 35 + .byte 164 + .byte 36 + .byte 165 + .byte 37 + .byte 166 + .byte 38 + .byte 167 + .byte 39 + .byte 168 + .byte 40 + .byte 169 + .byte 41 + .byte 170 + .byte 42 + .byte 171 + .byte 43 + .byte 172 + .byte 44 + .byte 173 + .byte 45 + .byte 174 + .byte 46 + .byte 175 + .byte 47 + .byte 176 + .byte 48 + .byte 177 + .byte 49 + .byte 178 + .byte 50 + .byte 179 + .byte 51 + .byte 180 + .byte 52 + .byte 181 + .byte 53 + .byte 182 + .byte 54 + .byte 183 + .byte 55 + .byte 184 + .byte 56 + .byte 185 + .byte 57 + .byte 186 + .byte 58 + .byte 187 + .byte 59 + .byte 188 + .byte 60 + .byte 189 + .byte 61 + .byte 190 + .byte 62 + .byte 191 + .byte 63 + .byte 64 + .byte 192 + .byte 65 + .byte 193 + .byte 66 + .byte 194 + .byte 67 + .byte 195 + .byte 68 + .byte 196 + .byte 69 + .byte 197 + .byte 70 + .byte 198 + .byte 71 + .byte 199 + .byte 72 + .byte 200 + .byte 73 + .byte 201 + .byte 74 + .byte 202 + .byte 75 + .byte 203 + .byte 76 + .byte 204 + .byte 77 + .byte 205 + .byte 78 + .byte 206 + .byte 79 + .byte 207 + .byte 80 + .byte 208 + .byte 81 + .byte 209 + .byte 82 + .byte 210 + .byte 83 + .byte 211 + .byte 84 + .byte 212 + .byte 85 + .byte 213 + .byte 86 + .byte 214 + .byte 87 + .byte 215 + .byte 88 + .byte 216 + .byte 89 + .byte 217 + .byte 90 + .byte 218 + .byte 91 + .byte 219 + .byte 92 + .byte 220 + .byte 93 + .byte 221 + .byte 94 + .byte 222 + .byte 95 + .byte 223 + .byte 224 + .byte 96 + .byte 225 + .byte 97 + .byte 226 + .byte 98 + .byte 227 + .byte 99 + .byte 228 + .byte 100 + .byte 229 + .byte 101 + .byte 230 + .byte 102 + .byte 231 + .byte 103 + .byte 232 + .byte 104 + .byte 233 + .byte 105 + .byte 234 + .byte 106 + .byte 235 + .byte 107 + .byte 236 + .byte 108 + .byte 237 + .byte 109 + .byte 238 + .byte 110 + .byte 239 + .byte 111 + .byte 240 + .byte 112 + .byte 241 + .byte 113 + .byte 242 + .byte 114 + .byte 243 + .byte 115 + .byte 244 + .byte 116 + .byte 245 + .byte 117 + .byte 246 + .byte 118 + .byte 247 + .byte 119 + .byte 248 + .byte 120 + .byte 249 + .byte 121 + .byte 250 + .byte 122 + .byte 251 + .byte 123 + .byte 252 + .byte 124 + .byte 253 + .byte 125 + .byte 254 + .byte 126 + .byte 255 + .byte 127 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_4, @object + .size table_4, 174 +table_4: + .byte 1 + .byte 0 + .byte 3 + .byte 0 + .byte 7 + .byte 0 + .byte 15 + .byte 0 + .byte 15 + .byte 1 + .byte 15 + .byte 3 + .byte 14 + .byte 7 + .byte 13 + .byte 7 + .byte 11 + .byte 7 + .byte 7 + .byte 7 + .byte 15 + .byte 6 + .byte 15 + .byte 5 + .byte 14 + .byte 3 + .byte 12 + .byte 7 + .byte 9 + .byte 7 + .byte 3 + .byte 7 + .byte 7 + .byte 6 + .byte 15 + .byte 4 + .byte 14 + .byte 1 + .byte 13 + .byte 3 + .byte 10 + .byte 7 + .byte 5 + .byte 7 + .byte 11 + .byte 6 + .byte 7 + .byte 5 + .byte 14 + .byte 2 + .byte 12 + .byte 5 + .byte 8 + .byte 3 + .byte 0 + .byte 7 + .byte 1 + .byte 6 + .byte 3 + .byte 4 + .byte 6 + .byte 0 + .byte 13 + .byte 0 + .byte 11 + .byte 1 + .byte 7 + .byte 3 + .byte 14 + .byte 6 + .byte 13 + .byte 5 + .byte 10 + .byte 3 + .byte 4 + .byte 7 + .byte 9 + .byte 6 + .byte 3 + .byte 5 + .byte 6 + .byte 2 + .byte 12 + .byte 4 + .byte 8 + .byte 1 + .byte 1 + .byte 3 + .byte 2 + .byte 6 + .byte 5 + .byte 4 + .byte 10 + .byte 0 + .byte 5 + .byte 1 + .byte 11 + .byte 2 + .byte 6 + .byte 5 + .byte 12 + .byte 2 + .byte 8 + .byte 5 + .byte 0 + .byte 3 + .byte 0 + .byte 6 + .byte 1 + .byte 4 + .byte 2 + .byte 0 + .byte 5 + .byte 0 + .byte 11 + .byte 0 + .byte 7 + .byte 1 + .byte 15 + .byte 2 + .byte 14 + .byte 5 + .byte 12 + .byte 3 + .byte 8 + .byte 7 + .byte 1 + .byte 7 + .byte 3 + .byte 6 + .byte 7 + .byte 4 + .byte 14 + .byte 0 + .byte 13 + .byte 1 + .byte 11 + .byte 3 + .byte 6 + .byte 7 + .byte 13 + .byte 6 + .byte 11 + .byte 5 + .byte 6 + .byte 3 + .byte 12 + .byte 6 + .byte 9 + .byte 5 + .byte 2 + .byte 3 + .byte 4 + .byte 6 + .byte 9 + .byte 4 + .byte 2 + .byte 1 + .byte 5 + .byte 2 + .byte 10 + .byte 4 + .byte 4 + .byte 1 + .byte 9 + .byte 2 + .byte 2 + .byte 5 + .byte 4 + .byte 2 + .byte 8 + .byte 4 + .byte 0 + .byte 1 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_5, @object + .size table_5, 256 +table_5: + .byte 204 + .byte 198 + .byte 201 + .byte 192 + .byte 193 + .byte 202 + .byte 194 + .byte 203 + .byte 195 + .byte 200 + .byte 197 + .byte 205 + .byte 196 + .byte 206 + .byte 199 + .byte 207 + .byte 108 + .byte 102 + .byte 105 + .byte 96 + .byte 97 + .byte 106 + .byte 98 + .byte 107 + .byte 99 + .byte 104 + .byte 101 + .byte 109 + .byte 100 + .byte 110 + .byte 103 + .byte 111 + .byte 156 + .byte 150 + .byte 153 + .byte 144 + .byte 145 + .byte 154 + .byte 146 + .byte 155 + .byte 147 + .byte 152 + .byte 149 + .byte 157 + .byte 148 + .byte 158 + .byte 151 + .byte 159 + .byte 12 + .byte 6 + .byte 9 + .byte 0 + .byte 1 + .byte 10 + .byte 2 + .byte 11 + .byte 3 + .byte 8 + .byte 5 + .byte 13 + .byte 4 + .byte 14 + .byte 7 + .byte 15 + .byte 28 + .byte 22 + .byte 25 + .byte 16 + .byte 17 + .byte 26 + .byte 18 + .byte 27 + .byte 19 + .byte 24 + .byte 21 + .byte 29 + .byte 20 + .byte 30 + .byte 23 + .byte 31 + .byte 172 + .byte 166 + .byte 169 + .byte 160 + .byte 161 + .byte 170 + .byte 162 + .byte 171 + .byte 163 + .byte 168 + .byte 165 + .byte 173 + .byte 164 + .byte 174 + .byte 167 + .byte 175 + .byte 44 + .byte 38 + .byte 41 + .byte 32 + .byte 33 + .byte 42 + .byte 34 + .byte 43 + .byte 35 + .byte 40 + .byte 37 + .byte 45 + .byte 36 + .byte 46 + .byte 39 + .byte 47 + .byte 188 + .byte 182 + .byte 185 + .byte 176 + .byte 177 + .byte 186 + .byte 178 + .byte 187 + .byte 179 + .byte 184 + .byte 181 + .byte 189 + .byte 180 + .byte 190 + .byte 183 + .byte 191 + .byte 60 + .byte 54 + .byte 57 + .byte 48 + .byte 49 + .byte 58 + .byte 50 + .byte 59 + .byte 51 + .byte 56 + .byte 53 + .byte 61 + .byte 52 + .byte 62 + .byte 55 + .byte 63 + .byte 140 + .byte 134 + .byte 137 + .byte 128 + .byte 129 + .byte 138 + .byte 130 + .byte 139 + .byte 131 + .byte 136 + .byte 133 + .byte 141 + .byte 132 + .byte 142 + .byte 135 + .byte 143 + .byte 92 + .byte 86 + .byte 89 + .byte 80 + .byte 81 + .byte 90 + .byte 82 + .byte 91 + .byte 83 + .byte 88 + .byte 85 + .byte 93 + .byte 84 + .byte 94 + .byte 87 + .byte 95 + .byte 220 + .byte 214 + .byte 217 + .byte 208 + .byte 209 + .byte 218 + .byte 210 + .byte 219 + .byte 211 + .byte 216 + .byte 213 + .byte 221 + .byte 212 + .byte 222 + .byte 215 + .byte 223 + .byte 76 + .byte 70 + .byte 73 + .byte 64 + .byte 65 + .byte 74 + .byte 66 + .byte 75 + .byte 67 + .byte 72 + .byte 69 + .byte 77 + .byte 68 + .byte 78 + .byte 71 + .byte 79 + .byte 236 + .byte 230 + .byte 233 + .byte 224 + .byte 225 + .byte 234 + .byte 226 + .byte 235 + .byte 227 + .byte 232 + .byte 229 + .byte 237 + .byte 228 + .byte 238 + .byte 231 + .byte 239 + .byte 124 + .byte 118 + .byte 121 + .byte 112 + .byte 113 + .byte 122 + .byte 114 + .byte 123 + .byte 115 + .byte 120 + .byte 117 + .byte 125 + .byte 116 + .byte 126 + .byte 119 + .byte 127 + .byte 252 + .byte 246 + .byte 249 + .byte 240 + .byte 241 + .byte 250 + .byte 242 + .byte 251 + .byte 243 + .byte 248 + .byte 245 + .byte 253 + .byte 244 + .byte 254 + .byte 247 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_6, @object + .size table_6, 256 +table_6: + .byte 51 + .byte 52 + .byte 54 + .byte 56 + .byte 60 + .byte 58 + .byte 49 + .byte 62 + .byte 57 + .byte 50 + .byte 53 + .byte 55 + .byte 48 + .byte 59 + .byte 61 + .byte 63 + .byte 67 + .byte 68 + .byte 70 + .byte 72 + .byte 76 + .byte 74 + .byte 65 + .byte 78 + .byte 73 + .byte 66 + .byte 69 + .byte 71 + .byte 64 + .byte 75 + .byte 77 + .byte 79 + .byte 99 + .byte 100 + .byte 102 + .byte 104 + .byte 108 + .byte 106 + .byte 97 + .byte 110 + .byte 105 + .byte 98 + .byte 101 + .byte 103 + .byte 96 + .byte 107 + .byte 109 + .byte 111 + .byte 131 + .byte 132 + .byte 134 + .byte 136 + .byte 140 + .byte 138 + .byte 129 + .byte 142 + .byte 137 + .byte 130 + .byte 133 + .byte 135 + .byte 128 + .byte 139 + .byte 141 + .byte 143 + .byte 195 + .byte 196 + .byte 198 + .byte 200 + .byte 204 + .byte 202 + .byte 193 + .byte 206 + .byte 201 + .byte 194 + .byte 197 + .byte 199 + .byte 192 + .byte 203 + .byte 205 + .byte 207 + .byte 163 + .byte 164 + .byte 166 + .byte 168 + .byte 172 + .byte 170 + .byte 161 + .byte 174 + .byte 169 + .byte 162 + .byte 165 + .byte 167 + .byte 160 + .byte 171 + .byte 173 + .byte 175 + .byte 19 + .byte 20 + .byte 22 + .byte 24 + .byte 28 + .byte 26 + .byte 17 + .byte 30 + .byte 25 + .byte 18 + .byte 21 + .byte 23 + .byte 16 + .byte 27 + .byte 29 + .byte 31 + .byte 227 + .byte 228 + .byte 230 + .byte 232 + .byte 236 + .byte 234 + .byte 225 + .byte 238 + .byte 233 + .byte 226 + .byte 229 + .byte 231 + .byte 224 + .byte 235 + .byte 237 + .byte 239 + .byte 147 + .byte 148 + .byte 150 + .byte 152 + .byte 156 + .byte 154 + .byte 145 + .byte 158 + .byte 153 + .byte 146 + .byte 149 + .byte 151 + .byte 144 + .byte 155 + .byte 157 + .byte 159 + .byte 35 + .byte 36 + .byte 38 + .byte 40 + .byte 44 + .byte 42 + .byte 33 + .byte 46 + .byte 41 + .byte 34 + .byte 37 + .byte 39 + .byte 32 + .byte 43 + .byte 45 + .byte 47 + .byte 83 + .byte 84 + .byte 86 + .byte 88 + .byte 92 + .byte 90 + .byte 81 + .byte 94 + .byte 89 + .byte 82 + .byte 85 + .byte 87 + .byte 80 + .byte 91 + .byte 93 + .byte 95 + .byte 115 + .byte 116 + .byte 118 + .byte 120 + .byte 124 + .byte 122 + .byte 113 + .byte 126 + .byte 121 + .byte 114 + .byte 117 + .byte 119 + .byte 112 + .byte 123 + .byte 125 + .byte 127 + .byte 3 + .byte 4 + .byte 6 + .byte 8 + .byte 12 + .byte 10 + .byte 1 + .byte 14 + .byte 9 + .byte 2 + .byte 5 + .byte 7 + .byte 0 + .byte 11 + .byte 13 + .byte 15 + .byte 179 + .byte 180 + .byte 182 + .byte 184 + .byte 188 + .byte 186 + .byte 177 + .byte 190 + .byte 185 + .byte 178 + .byte 181 + .byte 183 + .byte 176 + .byte 187 + .byte 189 + .byte 191 + .byte 211 + .byte 212 + .byte 214 + .byte 216 + .byte 220 + .byte 218 + .byte 209 + .byte 222 + .byte 217 + .byte 210 + .byte 213 + .byte 215 + .byte 208 + .byte 219 + .byte 221 + .byte 223 + .byte 243 + .byte 244 + .byte 246 + .byte 248 + .byte 252 + .byte 250 + .byte 241 + .byte 254 + .byte 249 + .byte 242 + .byte 245 + .byte 247 + .byte 240 + .byte 251 + .byte 253 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_7, @object + .size table_7, 256 +table_7: + .byte 0 + .byte 2 + .byte 4 + .byte 6 + .byte 9 + .byte 11 + .byte 13 + .byte 15 + .byte 1 + .byte 3 + .byte 5 + .byte 7 + .byte 8 + .byte 10 + .byte 12 + .byte 14 + .byte 32 + .byte 34 + .byte 36 + .byte 38 + .byte 41 + .byte 43 + .byte 45 + .byte 47 + .byte 33 + .byte 35 + .byte 37 + .byte 39 + .byte 40 + .byte 42 + .byte 44 + .byte 46 + .byte 64 + .byte 66 + .byte 68 + .byte 70 + .byte 73 + .byte 75 + .byte 77 + .byte 79 + .byte 65 + .byte 67 + .byte 69 + .byte 71 + .byte 72 + .byte 74 + .byte 76 + .byte 78 + .byte 96 + .byte 98 + .byte 100 + .byte 102 + .byte 105 + .byte 107 + .byte 109 + .byte 111 + .byte 97 + .byte 99 + .byte 101 + .byte 103 + .byte 104 + .byte 106 + .byte 108 + .byte 110 + .byte 144 + .byte 146 + .byte 148 + .byte 150 + .byte 153 + .byte 155 + .byte 157 + .byte 159 + .byte 145 + .byte 147 + .byte 149 + .byte 151 + .byte 152 + .byte 154 + .byte 156 + .byte 158 + .byte 176 + .byte 178 + .byte 180 + .byte 182 + .byte 185 + .byte 187 + .byte 189 + .byte 191 + .byte 177 + .byte 179 + .byte 181 + .byte 183 + .byte 184 + .byte 186 + .byte 188 + .byte 190 + .byte 208 + .byte 210 + .byte 212 + .byte 214 + .byte 217 + .byte 219 + .byte 221 + .byte 223 + .byte 209 + .byte 211 + .byte 213 + .byte 215 + .byte 216 + .byte 218 + .byte 220 + .byte 222 + .byte 240 + .byte 242 + .byte 244 + .byte 246 + .byte 249 + .byte 251 + .byte 253 + .byte 255 + .byte 241 + .byte 243 + .byte 245 + .byte 247 + .byte 248 + .byte 250 + .byte 252 + .byte 254 + .byte 16 + .byte 18 + .byte 20 + .byte 22 + .byte 25 + .byte 27 + .byte 29 + .byte 31 + .byte 17 + .byte 19 + .byte 21 + .byte 23 + .byte 24 + .byte 26 + .byte 28 + .byte 30 + .byte 48 + .byte 50 + .byte 52 + .byte 54 + .byte 57 + .byte 59 + .byte 61 + .byte 63 + .byte 49 + .byte 51 + .byte 53 + .byte 55 + .byte 56 + .byte 58 + .byte 60 + .byte 62 + .byte 80 + .byte 82 + .byte 84 + .byte 86 + .byte 89 + .byte 91 + .byte 93 + .byte 95 + .byte 81 + .byte 83 + .byte 85 + .byte 87 + .byte 88 + .byte 90 + .byte 92 + .byte 94 + .byte 112 + .byte 114 + .byte 116 + .byte 118 + .byte 121 + .byte 123 + .byte 125 + .byte 127 + .byte 113 + .byte 115 + .byte 117 + .byte 119 + .byte 120 + .byte 122 + .byte 124 + .byte 126 + .byte 128 + .byte 130 + .byte 132 + .byte 134 + .byte 137 + .byte 139 + .byte 141 + .byte 143 + .byte 129 + .byte 131 + .byte 133 + .byte 135 + .byte 136 + .byte 138 + .byte 140 + .byte 142 + .byte 160 + .byte 162 + .byte 164 + .byte 166 + .byte 169 + .byte 171 + .byte 173 + .byte 175 + .byte 161 + .byte 163 + .byte 165 + .byte 167 + .byte 168 + .byte 170 + .byte 172 + .byte 174 + .byte 192 + .byte 194 + .byte 196 + .byte 198 + .byte 201 + .byte 203 + .byte 205 + .byte 207 + .byte 193 + .byte 195 + .byte 197 + .byte 199 + .byte 200 + .byte 202 + .byte 204 + .byte 206 + .byte 224 + .byte 226 + .byte 228 + .byte 230 + .byte 233 + .byte 235 + .byte 237 + .byte 239 + .byte 225 + .byte 227 + .byte 229 + .byte 231 + .byte 232 + .byte 234 + .byte 236 + .byte 238 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_8, @object + .size table_8, 256 +table_8: + .byte 0 + .byte 8 + .byte 1 + .byte 9 + .byte 2 + .byte 10 + .byte 3 + .byte 11 + .byte 12 + .byte 4 + .byte 13 + .byte 5 + .byte 14 + .byte 6 + .byte 15 + .byte 7 + .byte 128 + .byte 136 + .byte 129 + .byte 137 + .byte 130 + .byte 138 + .byte 131 + .byte 139 + .byte 140 + .byte 132 + .byte 141 + .byte 133 + .byte 142 + .byte 134 + .byte 143 + .byte 135 + .byte 16 + .byte 24 + .byte 17 + .byte 25 + .byte 18 + .byte 26 + .byte 19 + .byte 27 + .byte 28 + .byte 20 + .byte 29 + .byte 21 + .byte 30 + .byte 22 + .byte 31 + .byte 23 + .byte 144 + .byte 152 + .byte 145 + .byte 153 + .byte 146 + .byte 154 + .byte 147 + .byte 155 + .byte 156 + .byte 148 + .byte 157 + .byte 149 + .byte 158 + .byte 150 + .byte 159 + .byte 151 + .byte 32 + .byte 40 + .byte 33 + .byte 41 + .byte 34 + .byte 42 + .byte 35 + .byte 43 + .byte 44 + .byte 36 + .byte 45 + .byte 37 + .byte 46 + .byte 38 + .byte 47 + .byte 39 + .byte 160 + .byte 168 + .byte 161 + .byte 169 + .byte 162 + .byte 170 + .byte 163 + .byte 171 + .byte 172 + .byte 164 + .byte 173 + .byte 165 + .byte 174 + .byte 166 + .byte 175 + .byte 167 + .byte 48 + .byte 56 + .byte 49 + .byte 57 + .byte 50 + .byte 58 + .byte 51 + .byte 59 + .byte 60 + .byte 52 + .byte 61 + .byte 53 + .byte 62 + .byte 54 + .byte 63 + .byte 55 + .byte 176 + .byte 184 + .byte 177 + .byte 185 + .byte 178 + .byte 186 + .byte 179 + .byte 187 + .byte 188 + .byte 180 + .byte 189 + .byte 181 + .byte 190 + .byte 182 + .byte 191 + .byte 183 + .byte 192 + .byte 200 + .byte 193 + .byte 201 + .byte 194 + .byte 202 + .byte 195 + .byte 203 + .byte 204 + .byte 196 + .byte 205 + .byte 197 + .byte 206 + .byte 198 + .byte 207 + .byte 199 + .byte 64 + .byte 72 + .byte 65 + .byte 73 + .byte 66 + .byte 74 + .byte 67 + .byte 75 + .byte 76 + .byte 68 + .byte 77 + .byte 69 + .byte 78 + .byte 70 + .byte 79 + .byte 71 + .byte 208 + .byte 216 + .byte 209 + .byte 217 + .byte 210 + .byte 218 + .byte 211 + .byte 219 + .byte 220 + .byte 212 + .byte 221 + .byte 213 + .byte 222 + .byte 214 + .byte 223 + .byte 215 + .byte 80 + .byte 88 + .byte 81 + .byte 89 + .byte 82 + .byte 90 + .byte 83 + .byte 91 + .byte 92 + .byte 84 + .byte 93 + .byte 85 + .byte 94 + .byte 86 + .byte 95 + .byte 87 + .byte 224 + .byte 232 + .byte 225 + .byte 233 + .byte 226 + .byte 234 + .byte 227 + .byte 235 + .byte 236 + .byte 228 + .byte 237 + .byte 229 + .byte 238 + .byte 230 + .byte 239 + .byte 231 + .byte 96 + .byte 104 + .byte 97 + .byte 105 + .byte 98 + .byte 106 + .byte 99 + .byte 107 + .byte 108 + .byte 100 + .byte 109 + .byte 101 + .byte 110 + .byte 102 + .byte 111 + .byte 103 + .byte 240 + .byte 248 + .byte 241 + .byte 249 + .byte 242 + .byte 250 + .byte 243 + .byte 251 + .byte 252 + .byte 244 + .byte 253 + .byte 245 + .byte 254 + .byte 246 + .byte 255 + .byte 247 + .byte 112 + .byte 120 + .byte 113 + .byte 121 + .byte 114 + .byte 122 + .byte 115 + .byte 123 + .byte 124 + .byte 116 + .byte 125 + .byte 117 + .byte 126 + .byte 118 + .byte 127 + .byte 119 + + .text +.global forkskinny_128_256_rounds + .type forkskinny_128_256_rounds, @function +forkskinny_128_256_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 51 + ldd r2,Z+32 + ldd r3,Z+33 + ldd r4,Z+34 + ldd r5,Z+35 + ldd r6,Z+36 + ldd r7,Z+37 + ldd r8,Z+38 + ldd r9,Z+39 + ldd r10,Z+40 + ldd r11,Z+41 + ldd r12,Z+42 + ldd r13,Z+43 + ldd r14,Z+44 + ldd r15,Z+45 + ldd r24,Z+46 + ldd r25,Z+47 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + lsl r20 + std Y+33,r20 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +86: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + mov r0,r14 + mov r14,r15 + mov r15,r24 + mov r24,r25 + mov r25,r0 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + movw r18,r14 + movw r26,r24 + eor r18,r10 + eor r19,r11 + eor r26,r12 + eor r27,r13 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r18 + movw r4,r26 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + ldd r0,Y+1 + std Y+9,r0 + ldd r0,Y+2 + std Y+10,r0 + ldd r0,Y+3 + std Y+11,r0 + ldd r0,Y+4 + std Y+12,r0 + ldd r0,Y+5 + std Y+13,r0 + ldd r0,Y+6 + std Y+14,r0 + ldd r0,Y+7 + std Y+15,r0 + ldd r0,Y+8 + std Y+16,r0 + std Y+1,r19 + std Y+2,r17 + std Y+3,r18 + std Y+4,r21 + std Y+5,r26 + std Y+6,r16 + std Y+7,r20 + std Y+8,r27 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r16,Y+31 + ldd r17,Y+32 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+17 + std Y+25,r0 + ldd r0,Y+18 + std Y+26,r0 + ldd r0,Y+19 + std Y+27,r0 + ldd r0,Y+20 + std Y+28,r0 + ldd r0,Y+21 + std Y+29,r0 + ldd r0,Y+22 + std Y+30,r0 + ldd r0,Y+23 + std Y+31,r0 + ldd r0,Y+24 + std Y+32,r0 + std Y+17,r19 + std Y+18,r17 + std Y+19,r18 + std Y+20,r21 + std Y+21,r26 + std Y+22,r16 + std Y+23,r20 + std Y+24,r27 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + ldd r18,Y+33 + cp r22,r18 + breq 5259f + rjmp 86b +5259: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+32,r2 + std Z+33,r3 + std Z+34,r4 + std Z+35,r5 + std Z+36,r6 + std Z+37,r7 + std Z+38,r8 + std Z+39,r9 + std Z+40,r10 + std Z+41,r11 + std Z+42,r12 + std Z+43,r13 + std Z+44,r14 + std Z+45,r15 + std Z+46,r24 + std Z+47,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + adiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_256_rounds, .-forkskinny_128_256_rounds + + .text +.global forkskinny_128_256_inv_rounds + .type forkskinny_128_256_inv_rounds, @function +forkskinny_128_256_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 51 + ldd r2,Z+32 + ldd r3,Z+33 + ldd r4,Z+34 + ldd r5,Z+35 + ldd r6,Z+36 + ldd r7,Z+37 + ldd r8,Z+38 + ldd r9,Z+39 + ldd r10,Z+40 + ldd r11,Z+41 + ldd r12,Z+42 + ldd r13,Z+43 + ldd r14,Z+44 + ldd r15,Z+45 + ldd r24,Z+46 + ldd r25,Z+47 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + lsl r20 + std Y+33,r20 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +86: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + ldd r0,Y+9 + std Y+1,r0 + ldd r0,Y+10 + std Y+2,r0 + ldd r0,Y+11 + std Y+3,r0 + ldd r0,Y+12 + std Y+4,r0 + ldd r0,Y+13 + std Y+5,r0 + ldd r0,Y+14 + std Y+6,r0 + ldd r0,Y+15 + std Y+7,r0 + ldd r0,Y+16 + std Y+8,r0 + std Y+9,r26 + std Y+10,r18 + std Y+11,r20 + std Y+12,r17 + std Y+13,r16 + std Y+14,r27 + std Y+15,r21 + std Y+16,r19 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r16,Y+23 + ldd r17,Y+24 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+25 + std Y+17,r0 + ldd r0,Y+26 + std Y+18,r0 + ldd r0,Y+27 + std Y+19,r0 + ldd r0,Y+28 + std Y+20,r0 + ldd r0,Y+29 + std Y+21,r0 + ldd r0,Y+30 + std Y+22,r0 + ldd r0,Y+31 + std Y+23,r0 + ldd r0,Y+32 + std Y+24,r0 + std Y+25,r26 + std Y+26,r18 + std Y+27,r20 + std Y+28,r17 + std Y+29,r16 + std Y+30,r27 + std Y+31,r21 + std Y+32,r19 + movw r18,r2 + movw r26,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r18 + movw r24,r26 + eor r14,r10 + eor r15,r11 + eor r24,r12 + eor r25,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + mov r0,r6 + mov r6,r7 + mov r7,r8 + mov r8,r9 + mov r9,r0 + mov r0,r10 + mov r10,r12 + mov r12,r0 + mov r0,r11 + mov r11,r13 + mov r13,r0 + mov r0,r25 + mov r25,r24 + mov r24,r15 + mov r15,r14 + mov r14,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + ldi r30,lo8(table_1) + ldi r31,hi8(table_1) +#if defined(RAMPZ) + ldi r18,hh8(table_1) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+33 + cp r22,r18 + breq 5259f + rjmp 86b +5259: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+32,r2 + std Z+33,r3 + std Z+34,r4 + std Z+35,r5 + std Z+36,r6 + std Z+37,r7 + std Z+38,r8 + std Z+39,r9 + std Z+40,r10 + std Z+41,r11 + std Z+42,r12 + std Z+43,r13 + std Z+44,r14 + std Z+45,r15 + std Z+46,r24 + std Z+47,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + adiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_256_inv_rounds, .-forkskinny_128_256_inv_rounds + + .text +.global forkskinny_128_256_forward_tk + .type forkskinny_128_256_forward_tk, @function +forkskinny_128_256_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 34 + ld r4,Z + ldd r5,Z+1 + ldd r6,Z+2 + ldd r7,Z+3 + ldd r8,Z+4 + ldd r9,Z+5 + ldd r10,Z+6 + ldd r11,Z+7 + ldd r12,Z+8 + ldd r13,Z+9 + ldd r14,Z+10 + ldd r15,Z+11 + ldd r24,Z+12 + ldd r25,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + push r31 + push r30 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r23,hh8(table_2) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +51: + movw r18,r12 + movw r20,r14 + movw r26,r24 + movw r2,r16 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + mov r4,r19 + mov r5,r3 + mov r6,r18 + mov r7,r27 + mov r8,r20 + mov r9,r2 + mov r10,r26 + mov r11,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r26,Y+13 + ldd r27,Y+14 + ldd r2,Y+15 + ldd r3,Y+16 + ldd r23,Y+1 + std Y+9,r23 + ldd r23,Y+2 + std Y+10,r23 + ldd r23,Y+3 + std Y+11,r23 + ldd r23,Y+4 + std Y+12,r23 + ldd r23,Y+5 + std Y+13,r23 + ldd r23,Y+6 + std Y+14,r23 + ldd r23,Y+7 + std Y+15,r23 + ldd r23,Y+8 + std Y+16,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+1,r19 + std Y+2,r3 + std Y+3,r18 + std Y+4,r27 + std Y+5,r20 + std Y+6,r2 + std Y+7,r26 + std Y+8,r21 + dec r22 + breq 5109f + rjmp 51b +5109: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r4 + std Z+1,r5 + std Z+2,r6 + std Z+3,r7 + std Z+4,r8 + std Z+5,r9 + std Z+6,r10 + std Z+7,r11 + std Z+8,r12 + std Z+9,r13 + std Z+10,r14 + std Z+11,r15 + std Z+12,r24 + std Z+13,r25 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_256_forward_tk, .-forkskinny_128_256_forward_tk + + .text +.global forkskinny_128_256_reverse_tk + .type forkskinny_128_256_reverse_tk, @function +forkskinny_128_256_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 34 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r23,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +51: + movw r18,r2 + movw r20,r4 + movw r26,r6 + movw r24,r8 + movw r2,r10 + movw r4,r12 + movw r6,r14 + movw r8,r16 + mov r10,r20 + mov r11,r18 + mov r12,r26 + mov r13,r25 + mov r14,r24 + mov r15,r21 + mov r16,r27 + mov r17,r19 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r24,Y+7 + ldd r25,Y+8 + ldd r23,Y+9 + std Y+1,r23 + ldd r23,Y+10 + std Y+2,r23 + ldd r23,Y+11 + std Y+3,r23 + ldd r23,Y+12 + std Y+4,r23 + ldd r23,Y+13 + std Y+5,r23 + ldd r23,Y+14 + std Y+6,r23 + ldd r23,Y+15 + std Y+7,r23 + ldd r23,Y+16 + std Y+8,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+9,r20 + std Y+10,r18 + std Y+11,r26 + std Y+12,r25 + std Y+13,r24 + std Y+14,r21 + std Y+15,r27 + std Y+16,r19 + dec r22 + breq 5109f + rjmp 51b +5109: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r2 + std Z+1,r3 + std Z+2,r4 + std Z+3,r5 + std Z+4,r6 + std Z+5,r7 + std Z+6,r8 + std Z+7,r9 + std Z+8,r10 + std Z+9,r11 + std Z+10,r12 + std Z+11,r13 + std Z+12,r14 + std Z+13,r15 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_256_reverse_tk, .-forkskinny_128_256_reverse_tk + + .text +.global forkskinny_128_384_rounds + .type forkskinny_128_384_rounds, @function +forkskinny_128_384_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 67 + ldd r2,Z+48 + ldd r3,Z+49 + ldd r4,Z+50 + ldd r5,Z+51 + ldd r6,Z+52 + ldd r7,Z+53 + ldd r8,Z+54 + ldd r9,Z+55 + ldd r10,Z+56 + ldd r11,Z+57 + ldd r12,Z+58 + ldd r13,Z+59 + ldd r14,Z+60 + ldd r15,Z+61 + ldd r24,Z+62 + ldd r25,Z+63 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r26,Z+34 + ldd r27,Z+35 + std Y+33,r18 + std Y+34,r19 + std Y+35,r26 + std Y+36,r27 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + std Y+37,r18 + std Y+38,r19 + std Y+39,r26 + std Y+40,r27 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r26,Z+42 + ldd r27,Z+43 + std Y+41,r18 + std Y+42,r19 + std Y+43,r26 + std Y+44,r27 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r26,Z+46 + ldd r27,Z+47 + std Y+45,r18 + std Y+46,r19 + std Y+47,r26 + std Y+48,r27 + lsl r20 + std Y+49,r20 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +118: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + mov r0,r14 + mov r14,r15 + mov r15,r24 + mov r24,r25 + mov r25,r0 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + movw r18,r14 + movw r26,r24 + eor r18,r10 + eor r19,r11 + eor r26,r12 + eor r27,r13 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r18 + movw r4,r26 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + ldd r0,Y+1 + std Y+9,r0 + ldd r0,Y+2 + std Y+10,r0 + ldd r0,Y+3 + std Y+11,r0 + ldd r0,Y+4 + std Y+12,r0 + ldd r0,Y+5 + std Y+13,r0 + ldd r0,Y+6 + std Y+14,r0 + ldd r0,Y+7 + std Y+15,r0 + ldd r0,Y+8 + std Y+16,r0 + std Y+1,r19 + std Y+2,r17 + std Y+3,r18 + std Y+4,r21 + std Y+5,r26 + std Y+6,r16 + std Y+7,r20 + std Y+8,r27 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r16,Y+31 + ldd r17,Y+32 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+17 + std Y+25,r0 + ldd r0,Y+18 + std Y+26,r0 + ldd r0,Y+19 + std Y+27,r0 + ldd r0,Y+20 + std Y+28,r0 + ldd r0,Y+21 + std Y+29,r0 + ldd r0,Y+22 + std Y+30,r0 + ldd r0,Y+23 + std Y+31,r0 + ldd r0,Y+24 + std Y+32,r0 + std Y+17,r19 + std Y+18,r17 + std Y+19,r18 + std Y+20,r21 + std Y+21,r26 + std Y+22,r16 + std Y+23,r20 + std Y+24,r27 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + ldd r20,Y+45 + ldd r21,Y+46 + ldd r16,Y+47 + ldd r17,Y+48 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+33 + std Y+41,r0 + ldd r0,Y+34 + std Y+42,r0 + ldd r0,Y+35 + std Y+43,r0 + ldd r0,Y+36 + std Y+44,r0 + ldd r0,Y+37 + std Y+45,r0 + ldd r0,Y+38 + std Y+46,r0 + ldd r0,Y+39 + std Y+47,r0 + ldd r0,Y+40 + std Y+48,r0 + std Y+33,r19 + std Y+34,r17 + std Y+35,r18 + std Y+36,r21 + std Y+37,r26 + std Y+38,r16 + std Y+39,r20 + std Y+40,r27 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + ldd r18,Y+49 + cp r22,r18 + breq 5348f + rjmp 118b +5348: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+48,r2 + std Z+49,r3 + std Z+50,r4 + std Z+51,r5 + std Z+52,r6 + std Z+53,r7 + std Z+54,r8 + std Z+55,r9 + std Z+56,r10 + std Z+57,r11 + std Z+58,r12 + std Z+59,r13 + std Z+60,r14 + std Z+61,r15 + std Z+62,r24 + std Z+63,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + std Z+32,r18 + std Z+33,r19 + std Z+34,r26 + std Z+35,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + std Z+36,r18 + std Z+37,r19 + std Z+38,r26 + std Z+39,r27 + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + std Z+40,r18 + std Z+41,r19 + std Z+42,r26 + std Z+43,r27 + ldd r18,Y+45 + ldd r19,Y+46 + ldd r26,Y+47 + ldd r27,Y+48 + std Z+44,r18 + std Z+45,r19 + std Z+46,r26 + std Z+47,r27 + adiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_384_rounds, .-forkskinny_128_384_rounds + + .text +.global forkskinny_128_384_inv_rounds + .type forkskinny_128_384_inv_rounds, @function +forkskinny_128_384_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 67 + ldd r2,Z+48 + ldd r3,Z+49 + ldd r4,Z+50 + ldd r5,Z+51 + ldd r6,Z+52 + ldd r7,Z+53 + ldd r8,Z+54 + ldd r9,Z+55 + ldd r10,Z+56 + ldd r11,Z+57 + ldd r12,Z+58 + ldd r13,Z+59 + ldd r14,Z+60 + ldd r15,Z+61 + ldd r24,Z+62 + ldd r25,Z+63 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r26,Z+34 + ldd r27,Z+35 + std Y+33,r18 + std Y+34,r19 + std Y+35,r26 + std Y+36,r27 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + std Y+37,r18 + std Y+38,r19 + std Y+39,r26 + std Y+40,r27 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r26,Z+42 + ldd r27,Z+43 + std Y+41,r18 + std Y+42,r19 + std Y+43,r26 + std Y+44,r27 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r26,Z+46 + ldd r27,Z+47 + std Y+45,r18 + std Y+46,r19 + std Y+47,r26 + std Y+48,r27 + lsl r20 + std Y+49,r20 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +118: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + ldd r0,Y+9 + std Y+1,r0 + ldd r0,Y+10 + std Y+2,r0 + ldd r0,Y+11 + std Y+3,r0 + ldd r0,Y+12 + std Y+4,r0 + ldd r0,Y+13 + std Y+5,r0 + ldd r0,Y+14 + std Y+6,r0 + ldd r0,Y+15 + std Y+7,r0 + ldd r0,Y+16 + std Y+8,r0 + std Y+9,r26 + std Y+10,r18 + std Y+11,r20 + std Y+12,r17 + std Y+13,r16 + std Y+14,r27 + std Y+15,r21 + std Y+16,r19 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r16,Y+23 + ldd r17,Y+24 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+25 + std Y+17,r0 + ldd r0,Y+26 + std Y+18,r0 + ldd r0,Y+27 + std Y+19,r0 + ldd r0,Y+28 + std Y+20,r0 + ldd r0,Y+29 + std Y+21,r0 + ldd r0,Y+30 + std Y+22,r0 + ldd r0,Y+31 + std Y+23,r0 + ldd r0,Y+32 + std Y+24,r0 + std Y+25,r26 + std Y+26,r18 + std Y+27,r20 + std Y+28,r17 + std Y+29,r16 + std Y+30,r27 + std Y+31,r21 + std Y+32,r19 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + ldd r20,Y+37 + ldd r21,Y+38 + ldd r16,Y+39 + ldd r17,Y+40 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+41 + std Y+33,r0 + ldd r0,Y+42 + std Y+34,r0 + ldd r0,Y+43 + std Y+35,r0 + ldd r0,Y+44 + std Y+36,r0 + ldd r0,Y+45 + std Y+37,r0 + ldd r0,Y+46 + std Y+38,r0 + ldd r0,Y+47 + std Y+39,r0 + ldd r0,Y+48 + std Y+40,r0 + std Y+41,r26 + std Y+42,r18 + std Y+43,r20 + std Y+44,r17 + std Y+45,r16 + std Y+46,r27 + std Y+47,r21 + std Y+48,r19 + movw r18,r2 + movw r26,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r18 + movw r24,r26 + eor r14,r10 + eor r15,r11 + eor r24,r12 + eor r25,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + mov r0,r6 + mov r6,r7 + mov r7,r8 + mov r8,r9 + mov r9,r0 + mov r0,r10 + mov r10,r12 + mov r12,r0 + mov r0,r11 + mov r11,r13 + mov r13,r0 + mov r0,r25 + mov r25,r24 + mov r24,r15 + mov r15,r14 + mov r14,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + ldi r30,lo8(table_1) + ldi r31,hi8(table_1) +#if defined(RAMPZ) + ldi r18,hh8(table_1) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+49 + cp r22,r18 + breq 5348f + rjmp 118b +5348: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+48,r2 + std Z+49,r3 + std Z+50,r4 + std Z+51,r5 + std Z+52,r6 + std Z+53,r7 + std Z+54,r8 + std Z+55,r9 + std Z+56,r10 + std Z+57,r11 + std Z+58,r12 + std Z+59,r13 + std Z+60,r14 + std Z+61,r15 + std Z+62,r24 + std Z+63,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + std Z+32,r18 + std Z+33,r19 + std Z+34,r26 + std Z+35,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + std Z+36,r18 + std Z+37,r19 + std Z+38,r26 + std Z+39,r27 + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + std Z+40,r18 + std Z+41,r19 + std Z+42,r26 + std Z+43,r27 + ldd r18,Y+45 + ldd r19,Y+46 + ldd r26,Y+47 + ldd r27,Y+48 + std Z+44,r18 + std Z+45,r19 + std Z+46,r26 + std Z+47,r27 + adiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_384_inv_rounds, .-forkskinny_128_384_inv_rounds + + .text +.global forkskinny_128_384_forward_tk + .type forkskinny_128_384_forward_tk, @function +forkskinny_128_384_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r4,Z + ldd r5,Z+1 + ldd r6,Z+2 + ldd r7,Z+3 + ldd r8,Z+4 + ldd r9,Z+5 + ldd r10,Z+6 + ldd r11,Z+7 + ldd r12,Z+8 + ldd r13,Z+9 + ldd r14,Z+10 + ldd r15,Z+11 + ldd r24,Z+12 + ldd r25,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + std Y+17,r18 + std Y+18,r19 + std Y+19,r20 + std Y+20,r21 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r20,Z+38 + ldd r21,Z+39 + std Y+21,r18 + std Y+22,r19 + std Y+23,r20 + std Y+24,r21 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r20,Z+42 + ldd r21,Z+43 + std Y+25,r18 + std Y+26,r19 + std Y+27,r20 + std Y+28,r21 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r20,Z+46 + ldd r21,Z+47 + std Y+29,r18 + std Y+30,r19 + std Y+31,r20 + std Y+32,r21 + push r31 + push r30 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r23,hh8(table_2) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +83: + movw r18,r12 + movw r20,r14 + movw r26,r24 + movw r2,r16 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + mov r4,r19 + mov r5,r3 + mov r6,r18 + mov r7,r27 + mov r8,r20 + mov r9,r2 + mov r10,r26 + mov r11,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r26,Y+13 + ldd r27,Y+14 + ldd r2,Y+15 + ldd r3,Y+16 + ldd r23,Y+1 + std Y+9,r23 + ldd r23,Y+2 + std Y+10,r23 + ldd r23,Y+3 + std Y+11,r23 + ldd r23,Y+4 + std Y+12,r23 + ldd r23,Y+5 + std Y+13,r23 + ldd r23,Y+6 + std Y+14,r23 + ldd r23,Y+7 + std Y+15,r23 + ldd r23,Y+8 + std Y+16,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+1,r19 + std Y+2,r3 + std Y+3,r18 + std Y+4,r27 + std Y+5,r20 + std Y+6,r2 + std Y+7,r26 + std Y+8,r21 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + ldd r26,Y+29 + ldd r27,Y+30 + ldd r2,Y+31 + ldd r3,Y+32 + ldd r23,Y+17 + std Y+25,r23 + ldd r23,Y+18 + std Y+26,r23 + ldd r23,Y+19 + std Y+27,r23 + ldd r23,Y+20 + std Y+28,r23 + ldd r23,Y+21 + std Y+29,r23 + ldd r23,Y+22 + std Y+30,r23 + ldd r23,Y+23 + std Y+31,r23 + ldd r23,Y+24 + std Y+32,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+17,r19 + std Y+18,r3 + std Y+19,r18 + std Y+20,r27 + std Y+21,r20 + std Y+22,r2 + std Y+23,r26 + std Y+24,r21 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + breq 5183f + rjmp 83b +5183: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r4 + std Z+1,r5 + std Z+2,r6 + std Z+3,r7 + std Z+4,r8 + std Z+5,r9 + std Z+6,r10 + std Z+7,r11 + std Z+8,r12 + std Z+9,r13 + std Z+10,r14 + std Z+11,r15 + std Z+12,r24 + std Z+13,r25 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r20,Y+23 + ldd r21,Y+24 + std Z+36,r18 + std Z+37,r19 + std Z+38,r20 + std Z+39,r21 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + std Z+40,r18 + std Z+41,r19 + std Z+42,r20 + std Z+43,r21 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r20,Y+31 + ldd r21,Y+32 + std Z+44,r18 + std Z+45,r19 + std Z+46,r20 + std Z+47,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_384_forward_tk, .-forkskinny_128_384_forward_tk + + .text +.global forkskinny_128_384_reverse_tk + .type forkskinny_128_384_reverse_tk, @function +forkskinny_128_384_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + std Y+17,r18 + std Y+18,r19 + std Y+19,r20 + std Y+20,r21 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r20,Z+38 + ldd r21,Z+39 + std Y+21,r18 + std Y+22,r19 + std Y+23,r20 + std Y+24,r21 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r20,Z+42 + ldd r21,Z+43 + std Y+25,r18 + std Y+26,r19 + std Y+27,r20 + std Y+28,r21 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r20,Z+46 + ldd r21,Z+47 + std Y+29,r18 + std Y+30,r19 + std Y+31,r20 + std Y+32,r21 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r23,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +83: + movw r18,r2 + movw r20,r4 + movw r26,r6 + movw r24,r8 + movw r2,r10 + movw r4,r12 + movw r6,r14 + movw r8,r16 + mov r10,r20 + mov r11,r18 + mov r12,r26 + mov r13,r25 + mov r14,r24 + mov r15,r21 + mov r16,r27 + mov r17,r19 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r24,Y+7 + ldd r25,Y+8 + ldd r23,Y+9 + std Y+1,r23 + ldd r23,Y+10 + std Y+2,r23 + ldd r23,Y+11 + std Y+3,r23 + ldd r23,Y+12 + std Y+4,r23 + ldd r23,Y+13 + std Y+5,r23 + ldd r23,Y+14 + std Y+6,r23 + ldd r23,Y+15 + std Y+7,r23 + ldd r23,Y+16 + std Y+8,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+9,r20 + std Y+10,r18 + std Y+11,r26 + std Y+12,r25 + std Y+13,r24 + std Y+14,r21 + std Y+15,r27 + std Y+16,r19 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + ldd r26,Y+21 + ldd r27,Y+22 + ldd r24,Y+23 + ldd r25,Y+24 + ldd r23,Y+25 + std Y+17,r23 + ldd r23,Y+26 + std Y+18,r23 + ldd r23,Y+27 + std Y+19,r23 + ldd r23,Y+28 + std Y+20,r23 + ldd r23,Y+29 + std Y+21,r23 + ldd r23,Y+30 + std Y+22,r23 + ldd r23,Y+31 + std Y+23,r23 + ldd r23,Y+32 + std Y+24,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+25,r20 + std Y+26,r18 + std Y+27,r26 + std Y+28,r25 + std Y+29,r24 + std Y+30,r21 + std Y+31,r27 + std Y+32,r19 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + breq 5183f + rjmp 83b +5183: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r2 + std Z+1,r3 + std Z+2,r4 + std Z+3,r5 + std Z+4,r6 + std Z+5,r7 + std Z+6,r8 + std Z+7,r9 + std Z+8,r10 + std Z+9,r11 + std Z+10,r12 + std Z+11,r13 + std Z+12,r14 + std Z+13,r15 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r20,Y+23 + ldd r21,Y+24 + std Z+36,r18 + std Z+37,r19 + std Z+38,r20 + std Z+39,r21 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + std Z+40,r18 + std Z+41,r19 + std Z+42,r20 + std Z+43,r21 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r20,Y+31 + ldd r21,Y+32 + std Z+44,r18 + std Z+45,r19 + std Z+46,r20 + std Z+47,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_384_reverse_tk, .-forkskinny_128_384_reverse_tk + + .text +.global forkskinny_64_192_rounds + .type forkskinny_64_192_rounds, @function +forkskinny_64_192_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 38 + ldd r26,Z+24 + ldd r27,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + ld r18,Z + ldd r19,Z+1 + std Y+1,r18 + std Y+2,r19 + ldd r18,Z+2 + ldd r19,Z+3 + std Y+3,r18 + std Y+4,r19 + ldd r18,Z+4 + ldd r19,Z+5 + std Y+5,r18 + std Y+6,r19 + ldd r18,Z+6 + ldd r19,Z+7 + std Y+7,r18 + std Y+8,r19 + ldd r18,Z+8 + ldd r19,Z+9 + std Y+9,r18 + std Y+10,r19 + ldd r18,Z+10 + ldd r19,Z+11 + std Y+11,r18 + std Y+12,r19 + ldd r18,Z+12 + ldd r19,Z+13 + std Y+13,r18 + std Y+14,r19 + ldd r18,Z+14 + ldd r19,Z+15 + std Y+15,r18 + std Y+16,r19 + ldd r18,Z+16 + ldd r19,Z+17 + std Y+17,r18 + std Y+18,r19 + ldd r18,Z+18 + ldd r19,Z+19 + std Y+19,r18 + std Y+20,r19 + ldd r18,Z+20 + ldd r19,Z+21 + std Y+21,r18 + std Y+22,r19 + ldd r18,Z+22 + ldd r19,Z+23 + std Y+23,r18 + std Y+24,r19 + push r31 + push r30 + ldi r30,lo8(table_5) + ldi r31,hi8(table_5) +#if defined(RAMPZ) + ldi r18,hh8(table_5) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 + lsl r20 +61: + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + eor r26,r18 + eor r27,r19 + ldd r18,Y+3 + ldd r19,Y+4 + eor r2,r18 + eor r3,r19 + ldd r18,Y+9 + ldd r19,Y+10 + eor r26,r18 + eor r27,r19 + ldd r18,Y+11 + ldd r19,Y+12 + eor r2,r18 + eor r3,r19 + ldd r18,Y+17 + ldd r19,Y+18 + eor r26,r18 + eor r27,r19 + ldd r18,Y+19 + ldd r19,Y+20 + eor r2,r18 + eor r3,r19 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r27,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r3,r18 + ldi r18,32 + eor r5,r18 + eor r26,r18 + mov r0,r1 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + or r3,r0 + mov r0,r4 + mov r4,r5 + mov r5,r0 + mov r0,r6 + mov r6,r7 + mov r7,r0 + mov r0,r1 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + or r7,r0 + eor r2,r4 + eor r3,r5 + eor r4,r26 + eor r5,r27 + movw r18,r6 + eor r18,r4 + eor r19,r5 + movw r6,r4 + movw r4,r2 + movw r2,r26 + movw r26,r18 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r8,Y+3 + ldd r9,Y+4 + ldd r10,Y+5 + ldd r11,Y+6 + ldd r12,Y+7 + ldd r13,Y+8 + std Y+5,r18 + std Y+6,r19 + std Y+7,r8 + std Y+8,r9 + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + ldi r25,240 + and r9,r25 + swap r12 + ldi r24,15 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+1,r18 + std Y+2,r19 + std Y+3,r8 + std Y+4,r9 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r18,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+9 + ldd r19,Y+10 + ldd r8,Y+11 + ldd r9,Y+12 + ldd r10,Y+13 + ldd r11,Y+14 + ldd r12,Y+15 + ldd r13,Y+16 + std Y+13,r18 + std Y+14,r19 + std Y+15,r8 + std Y+16,r9 + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + and r9,r25 + swap r12 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+9,r18 + std Y+10,r19 + std Y+11,r8 + std Y+12,r9 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r8,Y+19 + ldd r9,Y+20 + ldd r10,Y+21 + ldd r11,Y+22 + ldd r12,Y+23 + ldd r13,Y+24 + std Y+21,r18 + std Y+22,r19 + std Y+23,r8 + std Y+24,r9 + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + and r9,r25 + swap r12 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+17,r18 + std Y+18,r19 + std Y+19,r8 + std Y+20,r9 + ldi r30,lo8(table_5) + ldi r31,hi8(table_5) +#if defined(RAMPZ) + ldi r18,hh8(table_5) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + cp r22,r20 + breq 5273f + rjmp 61b +5273: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+24,r26 + std Z+25,r27 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Y+1 + ldd r19,Y+2 + st Z,r18 + std Z+1,r19 + ldd r18,Y+3 + ldd r19,Y+4 + std Z+2,r18 + std Z+3,r19 + ldd r18,Y+5 + ldd r19,Y+6 + std Z+4,r18 + std Z+5,r19 + ldd r18,Y+7 + ldd r19,Y+8 + std Z+6,r18 + std Z+7,r19 + ldd r18,Y+9 + ldd r19,Y+10 + std Z+8,r18 + std Z+9,r19 + ldd r18,Y+11 + ldd r19,Y+12 + std Z+10,r18 + std Z+11,r19 + ldd r18,Y+13 + ldd r19,Y+14 + std Z+12,r18 + std Z+13,r19 + ldd r18,Y+15 + ldd r19,Y+16 + std Z+14,r18 + std Z+15,r19 + ldd r18,Y+17 + ldd r19,Y+18 + std Z+16,r18 + std Z+17,r19 + ldd r18,Y+19 + ldd r19,Y+20 + std Z+18,r18 + std Z+19,r19 + ldd r18,Y+21 + ldd r19,Y+22 + std Z+20,r18 + std Z+21,r19 + ldd r18,Y+23 + ldd r19,Y+24 + std Z+22,r18 + std Z+23,r19 + adiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_rounds, .-forkskinny_64_192_rounds + + .text +.global forkskinny_64_192_inv_rounds + .type forkskinny_64_192_inv_rounds, @function +forkskinny_64_192_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 38 + ldd r26,Z+24 + ldd r27,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + ld r18,Z + ldd r19,Z+1 + std Y+1,r18 + std Y+2,r19 + ldd r18,Z+2 + ldd r19,Z+3 + std Y+3,r18 + std Y+4,r19 + ldd r18,Z+4 + ldd r19,Z+5 + std Y+5,r18 + std Y+6,r19 + ldd r18,Z+6 + ldd r19,Z+7 + std Y+7,r18 + std Y+8,r19 + ldd r18,Z+8 + ldd r19,Z+9 + std Y+9,r18 + std Y+10,r19 + ldd r18,Z+10 + ldd r19,Z+11 + std Y+11,r18 + std Y+12,r19 + ldd r18,Z+12 + ldd r19,Z+13 + std Y+13,r18 + std Y+14,r19 + ldd r18,Z+14 + ldd r19,Z+15 + std Y+15,r18 + std Y+16,r19 + ldd r18,Z+16 + ldd r19,Z+17 + std Y+17,r18 + std Y+18,r19 + ldd r18,Z+18 + ldd r19,Z+19 + std Y+19,r18 + std Y+20,r19 + ldd r18,Z+20 + ldd r19,Z+21 + std Y+21,r18 + std Y+22,r19 + ldd r18,Z+22 + ldd r19,Z+23 + std Y+23,r18 + std Y+24,r19 + push r31 + push r30 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 + lsl r20 +61: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r8,Y+3 + ldd r9,Y+4 + ldd r10,Y+5 + ldd r11,Y+6 + ldd r12,Y+7 + ldd r13,Y+8 + std Y+1,r10 + std Y+2,r11 + std Y+3,r12 + std Y+4,r13 + mov r11,r18 + ldi r25,240 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+5,r10 + std Y+6,r11 + std Y+7,r12 + std Y+8,r13 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r8,Y+11 + ldd r9,Y+12 + ldd r10,Y+13 + ldd r11,Y+14 + ldd r12,Y+15 + ldd r13,Y+16 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r11,r18 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+13,r10 + std Y+14,r11 + std Y+15,r12 + std Y+16,r13 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r18,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r8,Y+19 + ldd r9,Y+20 + ldd r10,Y+21 + ldd r11,Y+22 + ldd r12,Y+23 + ldd r13,Y+24 + std Y+17,r10 + std Y+18,r11 + std Y+19,r12 + std Y+20,r13 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r11,r18 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+21,r10 + std Y+22,r11 + std Y+23,r12 + std Y+24,r13 + movw r18,r26 + movw r26,r2 + movw r2,r4 + movw r4,r6 + movw r6,r18 + eor r6,r4 + eor r7,r5 + eor r4,r26 + eor r5,r27 + eor r2,r4 + eor r3,r5 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + mov r0,r5 + mov r5,r4 + mov r4,r0 + mov r0,r7 + mov r7,r6 + mov r6,r0 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + ldd r18,Y+1 + ldd r19,Y+2 + eor r26,r18 + eor r27,r19 + ldd r18,Y+3 + ldd r19,Y+4 + eor r2,r18 + eor r3,r19 + ldd r18,Y+9 + ldd r19,Y+10 + eor r26,r18 + eor r27,r19 + ldd r18,Y+11 + ldd r19,Y+12 + eor r2,r18 + eor r3,r19 + ldd r18,Y+17 + ldd r19,Y+18 + eor r26,r18 + eor r27,r19 + ldd r18,Y+19 + ldd r19,Y+20 + eor r2,r18 + eor r3,r19 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r3,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r27,r18 + ldi r18,32 + eor r5,r18 + eor r26,r18 + ldi r30,lo8(table_6) + ldi r31,hi8(table_6) +#if defined(RAMPZ) + ldi r18,hh8(table_6) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + cp r22,r20 + breq 5268f + rjmp 61b +5268: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+24,r26 + std Z+25,r27 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Y+1 + ldd r19,Y+2 + st Z,r18 + std Z+1,r19 + ldd r18,Y+3 + ldd r19,Y+4 + std Z+2,r18 + std Z+3,r19 + ldd r18,Y+5 + ldd r19,Y+6 + std Z+4,r18 + std Z+5,r19 + ldd r18,Y+7 + ldd r19,Y+8 + std Z+6,r18 + std Z+7,r19 + ldd r18,Y+9 + ldd r19,Y+10 + std Z+8,r18 + std Z+9,r19 + ldd r18,Y+11 + ldd r19,Y+12 + std Z+10,r18 + std Z+11,r19 + ldd r18,Y+13 + ldd r19,Y+14 + std Z+12,r18 + std Z+13,r19 + ldd r18,Y+15 + ldd r19,Y+16 + std Z+14,r18 + std Z+15,r19 + ldd r18,Y+17 + ldd r19,Y+18 + std Z+16,r18 + std Z+17,r19 + ldd r18,Y+19 + ldd r19,Y+20 + std Z+18,r18 + std Z+19,r19 + ldd r18,Y+21 + ldd r19,Y+22 + std Z+20,r18 + std Z+21,r19 + ldd r18,Y+23 + ldd r19,Y+24 + std Z+22,r18 + std Z+23,r19 + adiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_inv_rounds, .-forkskinny_64_192_inv_rounds + + .text +.global forkskinny_64_192_forward_tk + .type forkskinny_64_192_forward_tk, @function +forkskinny_64_192_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r28,Z+6 + ldd r29,Z+7 + ldd r2,Z+8 + ldd r3,Z+9 + ldd r4,Z+10 + ldd r5,Z+11 + ldd r6,Z+12 + ldd r7,Z+13 + ldd r8,Z+14 + ldd r9,Z+15 + ldd r10,Z+16 + ldd r11,Z+17 + ldd r12,Z+18 + ldd r13,Z+19 + ldd r14,Z+20 + ldd r15,Z+21 + ldd r24,Z+22 + ldd r25,Z+23 + push r31 + push r30 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +27: + push r19 + push r18 + push r21 + push r20 + mov r19,r27 + swap r19 + andi r19,240 + mov r23,r28 + andi r23,15 + or r19,r23 + mov r18,r27 + andi r18,240 + mov r23,r29 + andi r23,15 + or r18,r23 + mov r21,r26 + andi r21,240 + swap r28 + andi r28,15 + or r21,r28 + mov r20,r29 + andi r20,240 + andi r26,15 + or r20,r26 + pop r28 + pop r29 + pop r26 + pop r27 + push r3 + push r2 + push r5 + push r4 + mov r3,r7 + swap r3 + ldi r17,240 + and r3,r17 + mov r23,r8 + andi r23,15 + or r3,r23 + mov r2,r7 + and r2,r17 + mov r23,r9 + andi r23,15 + or r2,r23 + mov r5,r6 + and r5,r17 + swap r8 + ldi r16,15 + and r8,r16 + or r5,r8 + mov r4,r9 + and r4,r17 + and r6,r16 + or r4,r6 + pop r8 + pop r9 + pop r6 + pop r7 + push r11 + push r10 + push r13 + push r12 + mov r11,r15 + swap r11 + and r11,r17 + mov r23,r24 + andi r23,15 + or r11,r23 + mov r10,r15 + and r10,r17 + mov r23,r25 + andi r23,15 + or r10,r23 + mov r13,r14 + and r13,r17 + swap r24 + andi r24,15 + or r13,r24 + mov r12,r25 + and r12,r17 + and r14,r16 + or r12,r14 + pop r24 + pop r25 + pop r14 + pop r15 + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + dec r22 + breq 5125f + rjmp 27b +5125: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r26 + std Z+5,r27 + std Z+6,r28 + std Z+7,r29 + std Z+8,r2 + std Z+9,r3 + std Z+10,r4 + std Z+11,r5 + std Z+12,r6 + std Z+13,r7 + std Z+14,r8 + std Z+15,r9 + std Z+16,r10 + std Z+17,r11 + std Z+18,r12 + std Z+19,r13 + std Z+20,r14 + std Z+21,r15 + std Z+22,r24 + std Z+23,r25 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_forward_tk, .-forkskinny_64_192_forward_tk + + .text +.global forkskinny_64_192_reverse_tk + .type forkskinny_64_192_reverse_tk, @function +forkskinny_64_192_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r28,Z+6 + ldd r29,Z+7 + ldd r2,Z+8 + ldd r3,Z+9 + ldd r4,Z+10 + ldd r5,Z+11 + ldd r6,Z+12 + ldd r7,Z+13 + ldd r8,Z+14 + ldd r9,Z+15 + ldd r10,Z+16 + ldd r11,Z+17 + ldd r12,Z+18 + ldd r13,Z+19 + ldd r14,Z+20 + ldd r15,Z+21 + ldd r24,Z+22 + ldd r25,Z+23 + push r31 + push r30 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +27: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + push r27 + push r26 + push r29 + push r28 + mov r27,r18 + andi r27,240 + mov r23,r19 + swap r23 + andi r23,15 + or r27,r23 + mov r26,r21 + andi r26,240 + mov r23,r20 + andi r23,15 + or r26,r23 + mov r29,r20 + andi r29,240 + andi r18,15 + or r29,r18 + mov r28,r21 + swap r28 + andi r28,240 + andi r19,15 + or r28,r19 + pop r20 + pop r21 + pop r18 + pop r19 + push r7 + push r6 + push r9 + push r8 + mov r7,r2 + ldi r17,240 + and r7,r17 + mov r23,r3 + swap r23 + andi r23,15 + or r7,r23 + mov r6,r5 + and r6,r17 + mov r23,r4 + andi r23,15 + or r6,r23 + mov r9,r4 + and r9,r17 + ldi r16,15 + and r2,r16 + or r9,r2 + mov r8,r5 + swap r8 + and r8,r17 + and r3,r16 + or r8,r3 + pop r4 + pop r5 + pop r2 + pop r3 + push r15 + push r14 + push r25 + push r24 + mov r15,r10 + and r15,r17 + mov r23,r11 + swap r23 + andi r23,15 + or r15,r23 + mov r14,r13 + and r14,r17 + mov r23,r12 + andi r23,15 + or r14,r23 + mov r25,r12 + andi r25,240 + and r10,r16 + or r25,r10 + mov r24,r13 + swap r24 + andi r24,240 + and r11,r16 + or r24,r11 + pop r12 + pop r13 + pop r10 + pop r11 + dec r22 + breq 5125f + rjmp 27b +5125: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r26 + std Z+5,r27 + std Z+6,r28 + std Z+7,r29 + std Z+8,r2 + std Z+9,r3 + std Z+10,r4 + std Z+11,r5 + std Z+12,r6 + std Z+13,r7 + std Z+14,r8 + std Z+15,r9 + std Z+16,r10 + std Z+17,r11 + std Z+18,r12 + std Z+19,r13 + std Z+20,r14 + std Z+21,r15 + std Z+22,r24 + std Z+23,r25 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_reverse_tk, .-forkskinny_64_192_reverse_tk + +#endif diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/rhys/internal-forkskinny.c b/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/rhys/internal-forkskinny.c index b050ff1..6e2ac55 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/rhys/internal-forkskinny.c +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/rhys/internal-forkskinny.c @@ -40,35 +40,10 @@ static unsigned char const RC[87] = { 0x4a, 0x14, 0x29, 0x52, 0x24, 0x48, 0x10 }; -/** - * \brief Number of rounds of ForkSkinny-128-256 before forking. - */ -#define FORKSKINNY_128_256_ROUNDS_BEFORE 21 - -/** - * \brief Number of rounds of ForkSkinny-128-256 after forking. - */ -#define FORKSKINNY_128_256_ROUNDS_AFTER 27 - -/** - * \brief State information for ForkSkinny-128-256. - */ -typedef struct -{ - uint32_t TK1[4]; /**< First part of the tweakey */ - uint32_t TK2[4]; /**< Second part of the tweakey */ - uint32_t S[4]; /**< Current block state */ +#if !defined(__AVR__) -} forkskinny_128_256_state_t; - -/** - * \brief Applies one round of ForkSkinny-128-256. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_256_round - (forkskinny_128_256_state_t *state, unsigned round) +void forkskinny_128_256_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -79,137 +54,52 @@ static void forkskinny_128_256_round s2 = state->S[2]; s3 = state->S[3]; - /* Apply the S-box to all cells in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny128_sbox(s0); + skinny128_sbox(s1); + skinny128_sbox(s2); + skinny128_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); + s2 ^= 0x02; + + /* Shift the cells in the rows right, which moves the cell + * values up closer to the MSB. That is, we do a left rotate + * on the word to rotate the cells in the word right */ + s1 = leftRotate8(s1); + s2 = leftRotate16(s2); + s3 = leftRotate24(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + + /* Permute TK1 and TK2 for the next round */ + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + } /* Save the local variables back to the state */ state->S[0] = s0; state->S[1] = s1; state->S[2] = s2; state->S[3] = s3; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(state->TK1); - skinny128_permute_tk(state->TK2); - skinny128_LFSR2(state->TK2[0]); - skinny128_LFSR2(state->TK2[1]); -} - -void forkskinny_128_256_encrypt - (const unsigned char key[32], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_256_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_128_256_ROUNDS_BEFORE; ++round) { - forkskinny_128_256_round(&state, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint32_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&state, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x08040201U; /* Branching constant */ - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - for (round = (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); ++round) { - forkskinny_128_256_round(&state, round); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&state, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - } } -/** - * \brief Applies one round of ForkSkinny-128-256 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_256_inv_round - (forkskinny_128_256_state_t *state, unsigned round) +void forkskinny_128_256_inv_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -220,39 +110,42 @@ static void forkskinny_128_256_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Permute TK1 and TK2 for the next round */ - skinny128_inv_LFSR2(state->TK2[0]); - skinny128_inv_LFSR2(state->TK2[1]); - skinny128_inv_permute_tk(state->TK1); - skinny128_inv_permute_tk(state->TK2); - - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left, which moves the cell - * values down closer to the LSB. That is, we do a right - * rotate on the word to rotate the cells in the word left */ - s1 = rightRotate8(s1); - s2 = rightRotate16(s2); - s3 = rightRotate24(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1 and TK2 for the next round */ + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left, which moves the cell + * values down closer to the LSB. That is, we do a right + * rotate on the word to rotate the cells in the word left */ + s1 = rightRotate8(s1); + s2 = rightRotate16(s2); + s3 = rightRotate24(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); + s2 ^= 0x02; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny128_inv_sbox(s0); + skinny128_inv_sbox(s1); + skinny128_inv_sbox(s2); + skinny128_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -261,115 +154,64 @@ static void forkskinny_128_256_inv_round state->S[3] = s3; } -void forkskinny_128_256_decrypt - (const unsigned char key[32], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_128_256_forward_tk + (forkskinny_128_256_state_t *state, unsigned rounds) { - forkskinny_128_256_state_t state; - forkskinny_128_256_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); ++round) { - skinny128_permute_tk(state.TK1); - skinny128_permute_tk(state.TK2); - skinny128_LFSR2(state.TK2[0]); - skinny128_LFSR2(state.TK2[1]); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR2(state->TK2[2]); + skinny128_LFSR2(state->TK2[3]); + } + rounds -= 16; } - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); - round > (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); --round) { - forkskinny_128_256_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + --rounds; } +} - /* Remove the branching constant */ - state.S[0] ^= 0x08040201U; - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_128_256_ROUNDS_AFTER; ++round) { - skinny128_inv_LFSR2(state.TK2[0]); - skinny128_inv_LFSR2(state.TK2[1]); - skinny128_inv_permute_tk(state.TK1); - skinny128_inv_permute_tk(state.TK2); +void forkskinny_128_256_reverse_tk + (forkskinny_128_256_state_t *state, unsigned rounds) +{ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR2(state->TK2[2]); + skinny128_inv_LFSR2(state->TK2[3]); + } + rounds -= 16; } - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_128_256_inv_round(&state, round - 1); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&fstate, round); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + --rounds; } - le_store_word32(output_right, fstate.S[0]); - le_store_word32(output_right + 4, fstate.S[1]); - le_store_word32(output_right + 8, fstate.S[2]); - le_store_word32(output_right + 12, fstate.S[3]); } -/** - * \brief Number of rounds of ForkSkinny-128-384 before forking. - */ -#define FORKSKINNY_128_384_ROUNDS_BEFORE 25 - -/** - * \brief Number of rounds of ForkSkinny-128-384 after forking. - */ -#define FORKSKINNY_128_384_ROUNDS_AFTER 31 - -/** - * \brief State information for ForkSkinny-128-384. - */ -typedef struct -{ - uint32_t TK1[4]; /**< First part of the tweakey */ - uint32_t TK2[4]; /**< Second part of the tweakey */ - uint32_t TK3[4]; /**< Third part of the tweakey */ - uint32_t S[4]; /**< Current block state */ - -} forkskinny_128_384_state_t; - -/** - * \brief Applies one round of ForkSkinny-128-384. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_384_round - (forkskinny_128_384_state_t *state, unsigned round) +void forkskinny_128_384_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -380,145 +222,56 @@ static void forkskinny_128_384_round s2 = state->S[2]; s3 = state->S[3]; - /* Apply the S-box to all cells in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - (rc & 0x0F) ^ 0x00020000; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny128_sbox(s0); + skinny128_sbox(s1); + skinny128_sbox(s2); + skinny128_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + (rc & 0x0F) ^ 0x00020000; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); + s2 ^= 0x02; + + /* Shift the cells in the rows right, which moves the cell + * values up closer to the MSB. That is, we do a left rotate + * on the word to rotate the cells in the word right */ + s1 = leftRotate8(s1); + s2 = leftRotate16(s2); + s3 = leftRotate24(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + + /* Permute TK1, TK2, and TK3 for the next round */ + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_permute_tk(state->TK3); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + } /* Save the local variables back to the state */ state->S[0] = s0; state->S[1] = s1; state->S[2] = s2; state->S[3] = s3; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny128_permute_tk(state->TK1); - skinny128_permute_tk(state->TK2); - skinny128_permute_tk(state->TK3); - skinny128_LFSR2(state->TK2[0]); - skinny128_LFSR2(state->TK2[1]); - skinny128_LFSR3(state->TK3[0]); - skinny128_LFSR3(state->TK3[1]); } -void forkskinny_128_384_encrypt - (const unsigned char key[48], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_384_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.TK3[0] = le_load_word32(key + 32); - state.TK3[1] = le_load_word32(key + 36); - state.TK3[2] = le_load_word32(key + 40); - state.TK3[3] = le_load_word32(key + 44); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_128_384_ROUNDS_BEFORE; ++round) { - forkskinny_128_384_round(&state, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint32_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&state, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x08040201U; /* Branching constant */ - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - for (round = (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); ++round) { - forkskinny_128_384_round(&state, round); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&state, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - } -} - -/** - * \brief Applies one round of ForkSkinny-128-384 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_384_inv_round - (forkskinny_128_384_state_t *state, unsigned round) +void forkskinny_128_384_inv_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -529,43 +282,46 @@ static void forkskinny_128_384_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Permute TK1 and TK2 for the next round */ - skinny128_inv_LFSR2(state->TK2[0]); - skinny128_inv_LFSR2(state->TK2[1]); - skinny128_inv_LFSR3(state->TK3[0]); - skinny128_inv_LFSR3(state->TK3[1]); - skinny128_inv_permute_tk(state->TK1); - skinny128_inv_permute_tk(state->TK2); - skinny128_inv_permute_tk(state->TK3); - - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left, which moves the cell - * values down closer to the LSB. That is, we do a right - * rotate on the word to rotate the cells in the word left */ - s1 = rightRotate8(s1); - s2 = rightRotate16(s2); - s3 = rightRotate24(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - (rc & 0x0F) ^ 0x00020000; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1 and TK2 for the next round */ + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + skinny128_inv_permute_tk(state->TK3); + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left, which moves the cell + * values down closer to the LSB. That is, we do a right + * rotate on the word to rotate the cells in the word left */ + s1 = rightRotate8(s1); + s2 = rightRotate16(s2); + s3 = rightRotate24(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + (rc & 0x0F) ^ 0x00020000; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); + s2 ^= 0x02; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny128_inv_sbox(s0); + skinny128_inv_sbox(s1); + skinny128_inv_sbox(s2); + skinny128_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -574,128 +330,78 @@ static void forkskinny_128_384_inv_round state->S[3] = s3; } -void forkskinny_128_384_decrypt - (const unsigned char key[48], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_128_384_forward_tk + (forkskinny_128_384_state_t *state, unsigned rounds) { - forkskinny_128_384_state_t state; - forkskinny_128_384_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.TK3[0] = le_load_word32(key + 32); - state.TK3[1] = le_load_word32(key + 36); - state.TK3[2] = le_load_word32(key + 40); - state.TK3[3] = le_load_word32(key + 44); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); ++round) { - skinny128_permute_tk(state.TK1); - skinny128_permute_tk(state.TK2); - skinny128_permute_tk(state.TK3); - skinny128_LFSR2(state.TK2[0]); - skinny128_LFSR2(state.TK2[1]); - skinny128_LFSR3(state.TK3[0]); - skinny128_LFSR3(state.TK3[1]); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR2(state->TK2[2]); + skinny128_LFSR2(state->TK2[3]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + skinny128_LFSR3(state->TK3[2]); + skinny128_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); - round > (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); --round) { - forkskinny_128_384_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_permute_tk(state->TK3); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + --rounds; } +} - /* Remove the branching constant */ - state.S[0] ^= 0x08040201U; - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_128_384_ROUNDS_AFTER; ++round) { - skinny128_inv_LFSR2(state.TK2[0]); - skinny128_inv_LFSR2(state.TK2[1]); - skinny128_inv_LFSR3(state.TK3[0]); - skinny128_inv_LFSR3(state.TK3[1]); - skinny128_inv_permute_tk(state.TK1); - skinny128_inv_permute_tk(state.TK2); - skinny128_inv_permute_tk(state.TK3); +void forkskinny_128_384_reverse_tk + (forkskinny_128_384_state_t *state, unsigned rounds) +{ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR2(state->TK2[2]); + skinny128_inv_LFSR2(state->TK2[3]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_LFSR3(state->TK3[2]); + skinny128_inv_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_128_384_inv_round(&state, round - 1); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&fstate, round); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + skinny128_inv_permute_tk(state->TK3); + --rounds; } - le_store_word32(output_right, fstate.S[0]); - le_store_word32(output_right + 4, fstate.S[1]); - le_store_word32(output_right + 8, fstate.S[2]); - le_store_word32(output_right + 12, fstate.S[3]); } -/** - * \brief Number of rounds of ForkSkinny-64-192 before forking. - */ -#define FORKSKINNY_64_192_ROUNDS_BEFORE 17 - -/** - * \brief Number of rounds of ForkSkinny-64-192 after forking. - */ -#define FORKSKINNY_64_192_ROUNDS_AFTER 23 - -/** - * \brief State information for ForkSkinny-64-192. - */ -typedef struct -{ - uint16_t TK1[4]; /**< First part of the tweakey */ - uint16_t TK2[4]; /**< Second part of the tweakey */ - uint16_t TK3[4]; /**< Third part of the tweakey */ - uint16_t S[4]; /**< Current block state */ - -} forkskinny_64_192_state_t; - -/** - * \brief Applies one round of ForkSkinny-64-192. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - * - * Note: The cells of each row are order in big-endian nibble order - * so it is easiest to manage the rows in bit-endian byte order. - */ -static void forkskinny_64_192_round - (forkskinny_64_192_state_t *state, unsigned round) +void forkskinny_64_192_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last) { uint16_t s0, s1, s2, s3, temp; uint8_t rc; @@ -706,144 +412,55 @@ static void forkskinny_64_192_round s2 = state->S[2]; s3 = state->S[3]; - /* Apply the S-box to all cells in the state */ - skinny64_sbox(s0); - skinny64_sbox(s1); - skinny64_sbox(s2); - skinny64_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - ((rc & 0x0F) << 12) ^ 0x0020; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ - ((rc & 0x70) << 8); - s2 ^= 0x2000; - - /* Shift the cells in the rows right */ - s1 = rightRotate4_16(s1); - s2 = rightRotate8_16(s2); - s3 = rightRotate12_16(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny64_sbox(s0); + skinny64_sbox(s1); + skinny64_sbox(s2); + skinny64_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + ((rc & 0x0F) << 12) ^ 0x0020; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ + ((rc & 0x70) << 8); + s2 ^= 0x2000; + + /* Shift the cells in the rows right */ + s1 = rightRotate4_16(s1); + s2 = rightRotate8_16(s2); + s3 = rightRotate12_16(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + + /* Permute TK1, TK2, and TK3 for the next round */ + skinny64_permute_tk(state->TK1); + skinny64_permute_tk(state->TK2); + skinny64_permute_tk(state->TK3); + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + } /* Save the local variables back to the state */ state->S[0] = s0; state->S[1] = s1; state->S[2] = s2; state->S[3] = s3; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny64_permute_tk(state->TK1); - skinny64_permute_tk(state->TK2); - skinny64_permute_tk(state->TK3); - skinny64_LFSR2(state->TK2[0]); - skinny64_LFSR2(state->TK2[1]); - skinny64_LFSR3(state->TK3[0]); - skinny64_LFSR3(state->TK3[1]); } -void forkskinny_64_192_encrypt - (const unsigned char key[24], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_64_192_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = be_load_word16(key); - state.TK1[1] = be_load_word16(key + 2); - state.TK1[2] = be_load_word16(key + 4); - state.TK1[3] = be_load_word16(key + 6); - state.TK2[0] = be_load_word16(key + 8); - state.TK2[1] = be_load_word16(key + 10); - state.TK2[2] = be_load_word16(key + 12); - state.TK2[3] = be_load_word16(key + 14); - state.TK3[0] = be_load_word16(key + 16); - state.TK3[1] = be_load_word16(key + 18); - state.TK3[2] = be_load_word16(key + 20); - state.TK3[3] = be_load_word16(key + 22); - state.S[0] = be_load_word16(input); - state.S[1] = be_load_word16(input + 2); - state.S[2] = be_load_word16(input + 4); - state.S[3] = be_load_word16(input + 6); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_64_192_ROUNDS_BEFORE; ++round) { - forkskinny_64_192_round(&state, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint16_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_right, state.S[0]); - be_store_word16(output_right + 2, state.S[1]); - be_store_word16(output_right + 4, state.S[2]); - be_store_word16(output_right + 6, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x1249U; /* Branching constant */ - state.S[1] ^= 0x36daU; - state.S[2] ^= 0x5b7fU; - state.S[3] ^= 0xec81U; - for (round = (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_left, state.S[0]); - be_store_word16(output_left + 2, state.S[1]); - be_store_word16(output_left + 4, state.S[2]); - be_store_word16(output_left + 6, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_right, state.S[0]); - be_store_word16(output_right + 2, state.S[1]); - be_store_word16(output_right + 4, state.S[2]); - be_store_word16(output_right + 6, state.S[3]); - } -} - -/** - * \brief Applies one round of ForkSkinny-64-192 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_64_192_inv_round - (forkskinny_64_192_state_t *state, unsigned round) +void forkskinny_64_192_inv_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last) { uint16_t s0, s1, s2, s3, temp; uint8_t rc; @@ -854,42 +471,45 @@ static void forkskinny_64_192_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Permute TK1, TK2, and TK3 for the next round */ - skinny64_inv_LFSR2(state->TK2[0]); - skinny64_inv_LFSR2(state->TK2[1]); - skinny64_inv_LFSR3(state->TK3[0]); - skinny64_inv_LFSR3(state->TK3[1]); - skinny64_inv_permute_tk(state->TK1); - skinny64_inv_permute_tk(state->TK2); - skinny64_inv_permute_tk(state->TK3); - - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left */ - s1 = leftRotate4_16(s1); - s2 = leftRotate8_16(s2); - s3 = leftRotate12_16(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - ((rc & 0x0F) << 12) ^ 0x0020; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ - ((rc & 0x70) << 8); - s2 ^= 0x2000; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny64_inv_sbox(s0); - skinny64_inv_sbox(s1); - skinny64_inv_sbox(s2); - skinny64_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1, TK2, and TK3 for the next round */ + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_permute_tk(state->TK1); + skinny64_inv_permute_tk(state->TK2); + skinny64_inv_permute_tk(state->TK3); + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left */ + s1 = leftRotate4_16(s1); + s2 = leftRotate8_16(s2); + s3 = leftRotate12_16(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + ((rc & 0x0F) << 12) ^ 0x0020; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ + ((rc & 0x70) << 8); + s2 ^= 0x2000; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny64_inv_sbox(s0); + skinny64_inv_sbox(s1); + skinny64_inv_sbox(s2); + skinny64_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -898,91 +518,74 @@ static void forkskinny_64_192_inv_round state->S[3] = s3; } -void forkskinny_64_192_decrypt - (const unsigned char key[24], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_64_192_forward_tk + (forkskinny_64_192_state_t *state, unsigned rounds) { - forkskinny_64_192_state_t state; - forkskinny_64_192_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = be_load_word16(key); - state.TK1[1] = be_load_word16(key + 2); - state.TK1[2] = be_load_word16(key + 4); - state.TK1[3] = be_load_word16(key + 6); - state.TK2[0] = be_load_word16(key + 8); - state.TK2[1] = be_load_word16(key + 10); - state.TK2[2] = be_load_word16(key + 12); - state.TK2[3] = be_load_word16(key + 14); - state.TK3[0] = be_load_word16(key + 16); - state.TK3[1] = be_load_word16(key + 18); - state.TK3[2] = be_load_word16(key + 20); - state.TK3[3] = be_load_word16(key + 22); - state.S[0] = be_load_word16(input); - state.S[1] = be_load_word16(input + 2); - state.S[2] = be_load_word16(input + 4); - state.S[3] = be_load_word16(input + 6); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); ++round) { - skinny64_permute_tk(state.TK1); - skinny64_permute_tk(state.TK2); - skinny64_permute_tk(state.TK3); - skinny64_LFSR2(state.TK2[0]); - skinny64_LFSR2(state.TK2[1]); - skinny64_LFSR3(state.TK3[0]); - skinny64_LFSR3(state.TK3[1]); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny64_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR2(state->TK2[2]); + skinny64_LFSR2(state->TK2[3]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + skinny64_LFSR3(state->TK3[2]); + skinny64_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); - round > (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); --round) { - forkskinny_64_192_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny64_permute_tk(state->TK1); + skinny64_permute_tk(state->TK2); + skinny64_permute_tk(state->TK3); + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + --rounds; } +} - /* Remove the branching constant */ - state.S[0] ^= 0x1249U; - state.S[1] ^= 0x36daU; - state.S[2] ^= 0x5b7fU; - state.S[3] ^= 0xec81U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_64_192_ROUNDS_AFTER; ++round) { - skinny64_inv_LFSR2(state.TK2[0]); - skinny64_inv_LFSR2(state.TK2[1]); - skinny64_inv_LFSR3(state.TK3[0]); - skinny64_inv_LFSR3(state.TK3[1]); - skinny64_inv_permute_tk(state.TK1); - skinny64_inv_permute_tk(state.TK2); - skinny64_inv_permute_tk(state.TK3); +void forkskinny_64_192_reverse_tk + (forkskinny_64_192_state_t *state, unsigned rounds) +{ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny64_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR2(state->TK2[2]); + skinny64_inv_LFSR2(state->TK2[3]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_LFSR3(state->TK3[2]); + skinny64_inv_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_64_192_inv_round(&state, round - 1); - } - be_store_word16(output_left, state.S[0]); - be_store_word16(output_left + 2, state.S[1]); - be_store_word16(output_left + 4, state.S[2]); - be_store_word16(output_left + 6, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&fstate, round); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_permute_tk(state->TK1); + skinny64_inv_permute_tk(state->TK2); + skinny64_inv_permute_tk(state->TK3); + --rounds; } - be_store_word16(output_right, fstate.S[0]); - be_store_word16(output_right + 2, fstate.S[1]); - be_store_word16(output_right + 4, fstate.S[2]); - be_store_word16(output_right + 6, fstate.S[3]); } + +#endif /* !__AVR__ */ diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/rhys/internal-forkskinny.h b/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/rhys/internal-forkskinny.h index 0c1a707..e3014d3 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/rhys/internal-forkskinny.h +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/rhys/internal-forkskinny.h @@ -23,6 +23,8 @@ #ifndef LW_INTERNAL_FORKSKINNY_H #define LW_INTERNAL_FORKSKINNY_H +#include "internal-util.h" + /** * \file internal-forkskinny.h * \brief ForkSkinny block cipher family. @@ -39,6 +41,158 @@ extern "C" { #endif /** + * \brief State information for ForkSkinny-128-256. + */ +typedef struct +{ + uint32_t TK1[4]; /**< First part of the tweakey */ + uint32_t TK2[4]; /**< Second part of the tweakey */ + uint32_t S[4]; /**< Current block state */ + +} forkskinny_128_256_state_t; + +/** + * \brief State information for ForkSkinny-128-384. + */ +typedef struct +{ + uint32_t TK1[4]; /**< First part of the tweakey */ + uint32_t TK2[4]; /**< Second part of the tweakey */ + uint32_t TK3[4]; /**< Third part of the tweakey */ + uint32_t S[4]; /**< Current block state */ + +} forkskinny_128_384_state_t; + +/** + * \brief State information for ForkSkinny-64-192. + */ +typedef struct +{ + uint16_t TK1[4]; /**< First part of the tweakey */ + uint16_t TK2[4]; /**< Second part of the tweakey */ + uint16_t TK3[4]; /**< Third part of the tweakey */ + uint16_t S[4]; /**< Current block state */ + +} forkskinny_64_192_state_t; + +/** + * \brief Applies several rounds of ForkSkinny-128-256. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + */ +void forkskinny_128_256_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-128-256 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_128_256_inv_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-128-256. + * + * \param state Points to the ForkSkinny-128-256 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_128_256_forward_tk + (forkskinny_128_256_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-128-256. + * + * \param state Points to the ForkSkinny-128-256 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_128_256_reverse_tk + (forkskinny_128_256_state_t *state, unsigned rounds); + +/** + * \brief Applies several rounds of ForkSkinny-128-384. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + */ +void forkskinny_128_384_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-128-384 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_128_384_inv_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-128-384. + * + * \param state Points to the ForkSkinny-128-384 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_128_384_forward_tk + (forkskinny_128_384_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-128-384. + * + * \param state Points to the ForkSkinny-128-384 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_128_384_reverse_tk + (forkskinny_128_384_state_t *state, unsigned rounds); + +/** + * \brief Applies several rounds of ForkSkinny-64-192. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + * + * Note: The cells of each row are ordered in big-endian nibble order + * so it is simplest to manage the rows in big-endian byte order. + */ +void forkskinny_64_192_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-64-192 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_64_192_inv_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-64-192. + * + * \param state Points to the ForkSkinny-64-192 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_64_192_forward_tk + (forkskinny_64_192_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-64-192. + * + * \param state Points to the ForkSkinny-64-192 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_64_192_reverse_tk + (forkskinny_64_192_state_t *state, unsigned rounds); + +/** * \brief Encrypts a block of plaintext with ForkSkinny-128-256. * * \param key 256-bit tweakey for ForkSkinny-128-256. diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/rhys/internal-skinnyutil.h b/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/rhys/internal-skinnyutil.h index 83136cb..8a5296d 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/rhys/internal-skinnyutil.h +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/rhys/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: diff --git a/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/rhys/forkae.c b/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/rhys/forkae.c index 4a9671a..49e7610 100644 --- a/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/rhys/forkae.c +++ b/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/rhys/forkae.c @@ -22,7 +22,6 @@ #include "forkae.h" #include "internal-forkskinny.h" -#include "internal-util.h" #include aead_cipher_t const forkae_paef_64_192_cipher = { @@ -138,3 +137,476 @@ aead_cipher_t const forkae_saef_128_256_cipher = { #define FORKAE_TWEAKEY_REDUCED_SIZE 32 #define FORKAE_BLOCK_FUNC forkskinny_128_256 #include "internal-forkae-saef.h" + +/* Helper functions to implement the forking encrypt/decrypt block operations + * on top of the basic "perform N rounds" functions in internal-forkskinny.c */ + +/** + * \brief Number of rounds of ForkSkinny-128-256 before forking. + */ +#define FORKSKINNY_128_256_ROUNDS_BEFORE 21 + +/** + * \brief Number of rounds of ForkSkinny-128-256 after forking. + */ +#define FORKSKINNY_128_256_ROUNDS_AFTER 27 + +void forkskinny_128_256_encrypt + (const unsigned char key[32], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_256_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Run all of the rounds before the forking point */ + forkskinny_128_256_rounds(&state, 0, FORKSKINNY_128_256_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint32_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x08040201U; /* Branching constant */ + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + } +} + +void forkskinny_128_256_decrypt + (const unsigned char key[32], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_256_state_t state; + forkskinny_128_256_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_128_256_forward_tk + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_128_256_inv_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x08040201U; + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_128_256_reverse_tk(&state, FORKSKINNY_128_256_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_128_256_inv_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, 0); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_128_256_rounds + (&fstate, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, fstate.S[0]); + le_store_word32(output_right + 4, fstate.S[1]); + le_store_word32(output_right + 8, fstate.S[2]); + le_store_word32(output_right + 12, fstate.S[3]); +} + +/** + * \brief Number of rounds of ForkSkinny-128-384 before forking. + */ +#define FORKSKINNY_128_384_ROUNDS_BEFORE 25 + +/** + * \brief Number of rounds of ForkSkinny-128-384 after forking. + */ +#define FORKSKINNY_128_384_ROUNDS_AFTER 31 + +void forkskinny_128_384_encrypt + (const unsigned char key[48], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_384_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.TK3[0] = le_load_word32(key + 32); + state.TK3[1] = le_load_word32(key + 36); + state.TK3[2] = le_load_word32(key + 40); + state.TK3[3] = le_load_word32(key + 44); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Run all of the rounds before the forking point */ + forkskinny_128_384_rounds(&state, 0, FORKSKINNY_128_384_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint32_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x08040201U; /* Branching constant */ + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + } +} + +void forkskinny_128_384_decrypt + (const unsigned char key[48], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_384_state_t state; + forkskinny_128_384_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.TK3[0] = le_load_word32(key + 32); + state.TK3[1] = le_load_word32(key + 36); + state.TK3[2] = le_load_word32(key + 40); + state.TK3[3] = le_load_word32(key + 44); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_128_384_forward_tk + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_128_384_inv_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x08040201U; + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_128_384_reverse_tk(&state, FORKSKINNY_128_384_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_128_384_inv_rounds(&state, FORKSKINNY_128_384_ROUNDS_BEFORE, 0); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_128_384_rounds + (&fstate, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, fstate.S[0]); + le_store_word32(output_right + 4, fstate.S[1]); + le_store_word32(output_right + 8, fstate.S[2]); + le_store_word32(output_right + 12, fstate.S[3]); +} + +/** + * \brief Number of rounds of ForkSkinny-64-192 before forking. + */ +#define FORKSKINNY_64_192_ROUNDS_BEFORE 17 + +/** + * \brief Number of rounds of ForkSkinny-64-192 after forking. + */ +#define FORKSKINNY_64_192_ROUNDS_AFTER 23 + +void forkskinny_64_192_encrypt + (const unsigned char key[24], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_64_192_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = be_load_word16(key); + state.TK1[1] = be_load_word16(key + 2); + state.TK1[2] = be_load_word16(key + 4); + state.TK1[3] = be_load_word16(key + 6); + state.TK2[0] = be_load_word16(key + 8); + state.TK2[1] = be_load_word16(key + 10); + state.TK2[2] = be_load_word16(key + 12); + state.TK2[3] = be_load_word16(key + 14); + state.TK3[0] = be_load_word16(key + 16); + state.TK3[1] = be_load_word16(key + 18); + state.TK3[2] = be_load_word16(key + 20); + state.TK3[3] = be_load_word16(key + 22); + state.S[0] = be_load_word16(input); + state.S[1] = be_load_word16(input + 2); + state.S[2] = be_load_word16(input + 4); + state.S[3] = be_load_word16(input + 6); + + /* Run all of the rounds before the forking point */ + forkskinny_64_192_rounds(&state, 0, FORKSKINNY_64_192_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint16_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, state.S[0]); + be_store_word16(output_right + 2, state.S[1]); + be_store_word16(output_right + 4, state.S[2]); + be_store_word16(output_right + 6, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x1249U; /* Branching constant */ + state.S[1] ^= 0x36daU; + state.S[2] ^= 0x5b7fU; + state.S[3] ^= 0xec81U; + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2); + be_store_word16(output_left, state.S[0]); + be_store_word16(output_left + 2, state.S[1]); + be_store_word16(output_left + 4, state.S[2]); + be_store_word16(output_left + 6, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, state.S[0]); + be_store_word16(output_right + 2, state.S[1]); + be_store_word16(output_right + 4, state.S[2]); + be_store_word16(output_right + 6, state.S[3]); + } +} + +void forkskinny_64_192_decrypt + (const unsigned char key[24], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_64_192_state_t state; + forkskinny_64_192_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = be_load_word16(key); + state.TK1[1] = be_load_word16(key + 2); + state.TK1[2] = be_load_word16(key + 4); + state.TK1[3] = be_load_word16(key + 6); + state.TK2[0] = be_load_word16(key + 8); + state.TK2[1] = be_load_word16(key + 10); + state.TK2[2] = be_load_word16(key + 12); + state.TK2[3] = be_load_word16(key + 14); + state.TK3[0] = be_load_word16(key + 16); + state.TK3[1] = be_load_word16(key + 18); + state.TK3[2] = be_load_word16(key + 20); + state.TK3[3] = be_load_word16(key + 22); + state.S[0] = be_load_word16(input); + state.S[1] = be_load_word16(input + 2); + state.S[2] = be_load_word16(input + 4); + state.S[3] = be_load_word16(input + 6); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_64_192_forward_tk + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_64_192_inv_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x1249U; + state.S[1] ^= 0x36daU; + state.S[2] ^= 0x5b7fU; + state.S[3] ^= 0xec81U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_64_192_reverse_tk(&state, FORKSKINNY_64_192_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_64_192_inv_rounds(&state, FORKSKINNY_64_192_ROUNDS_BEFORE, 0); + be_store_word16(output_left, state.S[0]); + be_store_word16(output_left + 2, state.S[1]); + be_store_word16(output_left + 4, state.S[2]); + be_store_word16(output_left + 6, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_64_192_rounds + (&fstate, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, fstate.S[0]); + be_store_word16(output_right + 2, fstate.S[1]); + be_store_word16(output_right + 4, fstate.S[2]); + be_store_word16(output_right + 6, fstate.S[3]); +} diff --git a/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/rhys/internal-forkskinny-avr.S b/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/rhys/internal-forkskinny-avr.S new file mode 100644 index 0000000..c7e0b37 --- /dev/null +++ b/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/rhys/internal-forkskinny-avr.S @@ -0,0 +1,8880 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_0, @object + .size table_0, 256 +table_0: + .byte 101 + .byte 76 + .byte 106 + .byte 66 + .byte 75 + .byte 99 + .byte 67 + .byte 107 + .byte 85 + .byte 117 + .byte 90 + .byte 122 + .byte 83 + .byte 115 + .byte 91 + .byte 123 + .byte 53 + .byte 140 + .byte 58 + .byte 129 + .byte 137 + .byte 51 + .byte 128 + .byte 59 + .byte 149 + .byte 37 + .byte 152 + .byte 42 + .byte 144 + .byte 35 + .byte 153 + .byte 43 + .byte 229 + .byte 204 + .byte 232 + .byte 193 + .byte 201 + .byte 224 + .byte 192 + .byte 233 + .byte 213 + .byte 245 + .byte 216 + .byte 248 + .byte 208 + .byte 240 + .byte 217 + .byte 249 + .byte 165 + .byte 28 + .byte 168 + .byte 18 + .byte 27 + .byte 160 + .byte 19 + .byte 169 + .byte 5 + .byte 181 + .byte 10 + .byte 184 + .byte 3 + .byte 176 + .byte 11 + .byte 185 + .byte 50 + .byte 136 + .byte 60 + .byte 133 + .byte 141 + .byte 52 + .byte 132 + .byte 61 + .byte 145 + .byte 34 + .byte 156 + .byte 44 + .byte 148 + .byte 36 + .byte 157 + .byte 45 + .byte 98 + .byte 74 + .byte 108 + .byte 69 + .byte 77 + .byte 100 + .byte 68 + .byte 109 + .byte 82 + .byte 114 + .byte 92 + .byte 124 + .byte 84 + .byte 116 + .byte 93 + .byte 125 + .byte 161 + .byte 26 + .byte 172 + .byte 21 + .byte 29 + .byte 164 + .byte 20 + .byte 173 + .byte 2 + .byte 177 + .byte 12 + .byte 188 + .byte 4 + .byte 180 + .byte 13 + .byte 189 + .byte 225 + .byte 200 + .byte 236 + .byte 197 + .byte 205 + .byte 228 + .byte 196 + .byte 237 + .byte 209 + .byte 241 + .byte 220 + .byte 252 + .byte 212 + .byte 244 + .byte 221 + .byte 253 + .byte 54 + .byte 142 + .byte 56 + .byte 130 + .byte 139 + .byte 48 + .byte 131 + .byte 57 + .byte 150 + .byte 38 + .byte 154 + .byte 40 + .byte 147 + .byte 32 + .byte 155 + .byte 41 + .byte 102 + .byte 78 + .byte 104 + .byte 65 + .byte 73 + .byte 96 + .byte 64 + .byte 105 + .byte 86 + .byte 118 + .byte 88 + .byte 120 + .byte 80 + .byte 112 + .byte 89 + .byte 121 + .byte 166 + .byte 30 + .byte 170 + .byte 17 + .byte 25 + .byte 163 + .byte 16 + .byte 171 + .byte 6 + .byte 182 + .byte 8 + .byte 186 + .byte 0 + .byte 179 + .byte 9 + .byte 187 + .byte 230 + .byte 206 + .byte 234 + .byte 194 + .byte 203 + .byte 227 + .byte 195 + .byte 235 + .byte 214 + .byte 246 + .byte 218 + .byte 250 + .byte 211 + .byte 243 + .byte 219 + .byte 251 + .byte 49 + .byte 138 + .byte 62 + .byte 134 + .byte 143 + .byte 55 + .byte 135 + .byte 63 + .byte 146 + .byte 33 + .byte 158 + .byte 46 + .byte 151 + .byte 39 + .byte 159 + .byte 47 + .byte 97 + .byte 72 + .byte 110 + .byte 70 + .byte 79 + .byte 103 + .byte 71 + .byte 111 + .byte 81 + .byte 113 + .byte 94 + .byte 126 + .byte 87 + .byte 119 + .byte 95 + .byte 127 + .byte 162 + .byte 24 + .byte 174 + .byte 22 + .byte 31 + .byte 167 + .byte 23 + .byte 175 + .byte 1 + .byte 178 + .byte 14 + .byte 190 + .byte 7 + .byte 183 + .byte 15 + .byte 191 + .byte 226 + .byte 202 + .byte 238 + .byte 198 + .byte 207 + .byte 231 + .byte 199 + .byte 239 + .byte 210 + .byte 242 + .byte 222 + .byte 254 + .byte 215 + .byte 247 + .byte 223 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_1, @object + .size table_1, 256 +table_1: + .byte 172 + .byte 232 + .byte 104 + .byte 60 + .byte 108 + .byte 56 + .byte 168 + .byte 236 + .byte 170 + .byte 174 + .byte 58 + .byte 62 + .byte 106 + .byte 110 + .byte 234 + .byte 238 + .byte 166 + .byte 163 + .byte 51 + .byte 54 + .byte 102 + .byte 99 + .byte 227 + .byte 230 + .byte 225 + .byte 164 + .byte 97 + .byte 52 + .byte 49 + .byte 100 + .byte 161 + .byte 228 + .byte 141 + .byte 201 + .byte 73 + .byte 29 + .byte 77 + .byte 25 + .byte 137 + .byte 205 + .byte 139 + .byte 143 + .byte 27 + .byte 31 + .byte 75 + .byte 79 + .byte 203 + .byte 207 + .byte 133 + .byte 192 + .byte 64 + .byte 21 + .byte 69 + .byte 16 + .byte 128 + .byte 197 + .byte 130 + .byte 135 + .byte 18 + .byte 23 + .byte 66 + .byte 71 + .byte 194 + .byte 199 + .byte 150 + .byte 147 + .byte 3 + .byte 6 + .byte 86 + .byte 83 + .byte 211 + .byte 214 + .byte 209 + .byte 148 + .byte 81 + .byte 4 + .byte 1 + .byte 84 + .byte 145 + .byte 212 + .byte 156 + .byte 216 + .byte 88 + .byte 12 + .byte 92 + .byte 8 + .byte 152 + .byte 220 + .byte 154 + .byte 158 + .byte 10 + .byte 14 + .byte 90 + .byte 94 + .byte 218 + .byte 222 + .byte 149 + .byte 208 + .byte 80 + .byte 5 + .byte 85 + .byte 0 + .byte 144 + .byte 213 + .byte 146 + .byte 151 + .byte 2 + .byte 7 + .byte 82 + .byte 87 + .byte 210 + .byte 215 + .byte 157 + .byte 217 + .byte 89 + .byte 13 + .byte 93 + .byte 9 + .byte 153 + .byte 221 + .byte 155 + .byte 159 + .byte 11 + .byte 15 + .byte 91 + .byte 95 + .byte 219 + .byte 223 + .byte 22 + .byte 19 + .byte 131 + .byte 134 + .byte 70 + .byte 67 + .byte 195 + .byte 198 + .byte 65 + .byte 20 + .byte 193 + .byte 132 + .byte 17 + .byte 68 + .byte 129 + .byte 196 + .byte 28 + .byte 72 + .byte 200 + .byte 140 + .byte 76 + .byte 24 + .byte 136 + .byte 204 + .byte 26 + .byte 30 + .byte 138 + .byte 142 + .byte 74 + .byte 78 + .byte 202 + .byte 206 + .byte 53 + .byte 96 + .byte 224 + .byte 165 + .byte 101 + .byte 48 + .byte 160 + .byte 229 + .byte 50 + .byte 55 + .byte 162 + .byte 167 + .byte 98 + .byte 103 + .byte 226 + .byte 231 + .byte 61 + .byte 105 + .byte 233 + .byte 173 + .byte 109 + .byte 57 + .byte 169 + .byte 237 + .byte 59 + .byte 63 + .byte 171 + .byte 175 + .byte 107 + .byte 111 + .byte 235 + .byte 239 + .byte 38 + .byte 35 + .byte 179 + .byte 182 + .byte 118 + .byte 115 + .byte 243 + .byte 246 + .byte 113 + .byte 36 + .byte 241 + .byte 180 + .byte 33 + .byte 116 + .byte 177 + .byte 244 + .byte 44 + .byte 120 + .byte 248 + .byte 188 + .byte 124 + .byte 40 + .byte 184 + .byte 252 + .byte 42 + .byte 46 + .byte 186 + .byte 190 + .byte 122 + .byte 126 + .byte 250 + .byte 254 + .byte 37 + .byte 112 + .byte 240 + .byte 181 + .byte 117 + .byte 32 + .byte 176 + .byte 245 + .byte 34 + .byte 39 + .byte 178 + .byte 183 + .byte 114 + .byte 119 + .byte 242 + .byte 247 + .byte 45 + .byte 121 + .byte 249 + .byte 189 + .byte 125 + .byte 41 + .byte 185 + .byte 253 + .byte 43 + .byte 47 + .byte 187 + .byte 191 + .byte 123 + .byte 127 + .byte 251 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_2, @object + .size table_2, 256 +table_2: + .byte 0 + .byte 2 + .byte 4 + .byte 6 + .byte 8 + .byte 10 + .byte 12 + .byte 14 + .byte 16 + .byte 18 + .byte 20 + .byte 22 + .byte 24 + .byte 26 + .byte 28 + .byte 30 + .byte 32 + .byte 34 + .byte 36 + .byte 38 + .byte 40 + .byte 42 + .byte 44 + .byte 46 + .byte 48 + .byte 50 + .byte 52 + .byte 54 + .byte 56 + .byte 58 + .byte 60 + .byte 62 + .byte 65 + .byte 67 + .byte 69 + .byte 71 + .byte 73 + .byte 75 + .byte 77 + .byte 79 + .byte 81 + .byte 83 + .byte 85 + .byte 87 + .byte 89 + .byte 91 + .byte 93 + .byte 95 + .byte 97 + .byte 99 + .byte 101 + .byte 103 + .byte 105 + .byte 107 + .byte 109 + .byte 111 + .byte 113 + .byte 115 + .byte 117 + .byte 119 + .byte 121 + .byte 123 + .byte 125 + .byte 127 + .byte 128 + .byte 130 + .byte 132 + .byte 134 + .byte 136 + .byte 138 + .byte 140 + .byte 142 + .byte 144 + .byte 146 + .byte 148 + .byte 150 + .byte 152 + .byte 154 + .byte 156 + .byte 158 + .byte 160 + .byte 162 + .byte 164 + .byte 166 + .byte 168 + .byte 170 + .byte 172 + .byte 174 + .byte 176 + .byte 178 + .byte 180 + .byte 182 + .byte 184 + .byte 186 + .byte 188 + .byte 190 + .byte 193 + .byte 195 + .byte 197 + .byte 199 + .byte 201 + .byte 203 + .byte 205 + .byte 207 + .byte 209 + .byte 211 + .byte 213 + .byte 215 + .byte 217 + .byte 219 + .byte 221 + .byte 223 + .byte 225 + .byte 227 + .byte 229 + .byte 231 + .byte 233 + .byte 235 + .byte 237 + .byte 239 + .byte 241 + .byte 243 + .byte 245 + .byte 247 + .byte 249 + .byte 251 + .byte 253 + .byte 255 + .byte 1 + .byte 3 + .byte 5 + .byte 7 + .byte 9 + .byte 11 + .byte 13 + .byte 15 + .byte 17 + .byte 19 + .byte 21 + .byte 23 + .byte 25 + .byte 27 + .byte 29 + .byte 31 + .byte 33 + .byte 35 + .byte 37 + .byte 39 + .byte 41 + .byte 43 + .byte 45 + .byte 47 + .byte 49 + .byte 51 + .byte 53 + .byte 55 + .byte 57 + .byte 59 + .byte 61 + .byte 63 + .byte 64 + .byte 66 + .byte 68 + .byte 70 + .byte 72 + .byte 74 + .byte 76 + .byte 78 + .byte 80 + .byte 82 + .byte 84 + .byte 86 + .byte 88 + .byte 90 + .byte 92 + .byte 94 + .byte 96 + .byte 98 + .byte 100 + .byte 102 + .byte 104 + .byte 106 + .byte 108 + .byte 110 + .byte 112 + .byte 114 + .byte 116 + .byte 118 + .byte 120 + .byte 122 + .byte 124 + .byte 126 + .byte 129 + .byte 131 + .byte 133 + .byte 135 + .byte 137 + .byte 139 + .byte 141 + .byte 143 + .byte 145 + .byte 147 + .byte 149 + .byte 151 + .byte 153 + .byte 155 + .byte 157 + .byte 159 + .byte 161 + .byte 163 + .byte 165 + .byte 167 + .byte 169 + .byte 171 + .byte 173 + .byte 175 + .byte 177 + .byte 179 + .byte 181 + .byte 183 + .byte 185 + .byte 187 + .byte 189 + .byte 191 + .byte 192 + .byte 194 + .byte 196 + .byte 198 + .byte 200 + .byte 202 + .byte 204 + .byte 206 + .byte 208 + .byte 210 + .byte 212 + .byte 214 + .byte 216 + .byte 218 + .byte 220 + .byte 222 + .byte 224 + .byte 226 + .byte 228 + .byte 230 + .byte 232 + .byte 234 + .byte 236 + .byte 238 + .byte 240 + .byte 242 + .byte 244 + .byte 246 + .byte 248 + .byte 250 + .byte 252 + .byte 254 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_3, @object + .size table_3, 256 +table_3: + .byte 0 + .byte 128 + .byte 1 + .byte 129 + .byte 2 + .byte 130 + .byte 3 + .byte 131 + .byte 4 + .byte 132 + .byte 5 + .byte 133 + .byte 6 + .byte 134 + .byte 7 + .byte 135 + .byte 8 + .byte 136 + .byte 9 + .byte 137 + .byte 10 + .byte 138 + .byte 11 + .byte 139 + .byte 12 + .byte 140 + .byte 13 + .byte 141 + .byte 14 + .byte 142 + .byte 15 + .byte 143 + .byte 16 + .byte 144 + .byte 17 + .byte 145 + .byte 18 + .byte 146 + .byte 19 + .byte 147 + .byte 20 + .byte 148 + .byte 21 + .byte 149 + .byte 22 + .byte 150 + .byte 23 + .byte 151 + .byte 24 + .byte 152 + .byte 25 + .byte 153 + .byte 26 + .byte 154 + .byte 27 + .byte 155 + .byte 28 + .byte 156 + .byte 29 + .byte 157 + .byte 30 + .byte 158 + .byte 31 + .byte 159 + .byte 160 + .byte 32 + .byte 161 + .byte 33 + .byte 162 + .byte 34 + .byte 163 + .byte 35 + .byte 164 + .byte 36 + .byte 165 + .byte 37 + .byte 166 + .byte 38 + .byte 167 + .byte 39 + .byte 168 + .byte 40 + .byte 169 + .byte 41 + .byte 170 + .byte 42 + .byte 171 + .byte 43 + .byte 172 + .byte 44 + .byte 173 + .byte 45 + .byte 174 + .byte 46 + .byte 175 + .byte 47 + .byte 176 + .byte 48 + .byte 177 + .byte 49 + .byte 178 + .byte 50 + .byte 179 + .byte 51 + .byte 180 + .byte 52 + .byte 181 + .byte 53 + .byte 182 + .byte 54 + .byte 183 + .byte 55 + .byte 184 + .byte 56 + .byte 185 + .byte 57 + .byte 186 + .byte 58 + .byte 187 + .byte 59 + .byte 188 + .byte 60 + .byte 189 + .byte 61 + .byte 190 + .byte 62 + .byte 191 + .byte 63 + .byte 64 + .byte 192 + .byte 65 + .byte 193 + .byte 66 + .byte 194 + .byte 67 + .byte 195 + .byte 68 + .byte 196 + .byte 69 + .byte 197 + .byte 70 + .byte 198 + .byte 71 + .byte 199 + .byte 72 + .byte 200 + .byte 73 + .byte 201 + .byte 74 + .byte 202 + .byte 75 + .byte 203 + .byte 76 + .byte 204 + .byte 77 + .byte 205 + .byte 78 + .byte 206 + .byte 79 + .byte 207 + .byte 80 + .byte 208 + .byte 81 + .byte 209 + .byte 82 + .byte 210 + .byte 83 + .byte 211 + .byte 84 + .byte 212 + .byte 85 + .byte 213 + .byte 86 + .byte 214 + .byte 87 + .byte 215 + .byte 88 + .byte 216 + .byte 89 + .byte 217 + .byte 90 + .byte 218 + .byte 91 + .byte 219 + .byte 92 + .byte 220 + .byte 93 + .byte 221 + .byte 94 + .byte 222 + .byte 95 + .byte 223 + .byte 224 + .byte 96 + .byte 225 + .byte 97 + .byte 226 + .byte 98 + .byte 227 + .byte 99 + .byte 228 + .byte 100 + .byte 229 + .byte 101 + .byte 230 + .byte 102 + .byte 231 + .byte 103 + .byte 232 + .byte 104 + .byte 233 + .byte 105 + .byte 234 + .byte 106 + .byte 235 + .byte 107 + .byte 236 + .byte 108 + .byte 237 + .byte 109 + .byte 238 + .byte 110 + .byte 239 + .byte 111 + .byte 240 + .byte 112 + .byte 241 + .byte 113 + .byte 242 + .byte 114 + .byte 243 + .byte 115 + .byte 244 + .byte 116 + .byte 245 + .byte 117 + .byte 246 + .byte 118 + .byte 247 + .byte 119 + .byte 248 + .byte 120 + .byte 249 + .byte 121 + .byte 250 + .byte 122 + .byte 251 + .byte 123 + .byte 252 + .byte 124 + .byte 253 + .byte 125 + .byte 254 + .byte 126 + .byte 255 + .byte 127 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_4, @object + .size table_4, 174 +table_4: + .byte 1 + .byte 0 + .byte 3 + .byte 0 + .byte 7 + .byte 0 + .byte 15 + .byte 0 + .byte 15 + .byte 1 + .byte 15 + .byte 3 + .byte 14 + .byte 7 + .byte 13 + .byte 7 + .byte 11 + .byte 7 + .byte 7 + .byte 7 + .byte 15 + .byte 6 + .byte 15 + .byte 5 + .byte 14 + .byte 3 + .byte 12 + .byte 7 + .byte 9 + .byte 7 + .byte 3 + .byte 7 + .byte 7 + .byte 6 + .byte 15 + .byte 4 + .byte 14 + .byte 1 + .byte 13 + .byte 3 + .byte 10 + .byte 7 + .byte 5 + .byte 7 + .byte 11 + .byte 6 + .byte 7 + .byte 5 + .byte 14 + .byte 2 + .byte 12 + .byte 5 + .byte 8 + .byte 3 + .byte 0 + .byte 7 + .byte 1 + .byte 6 + .byte 3 + .byte 4 + .byte 6 + .byte 0 + .byte 13 + .byte 0 + .byte 11 + .byte 1 + .byte 7 + .byte 3 + .byte 14 + .byte 6 + .byte 13 + .byte 5 + .byte 10 + .byte 3 + .byte 4 + .byte 7 + .byte 9 + .byte 6 + .byte 3 + .byte 5 + .byte 6 + .byte 2 + .byte 12 + .byte 4 + .byte 8 + .byte 1 + .byte 1 + .byte 3 + .byte 2 + .byte 6 + .byte 5 + .byte 4 + .byte 10 + .byte 0 + .byte 5 + .byte 1 + .byte 11 + .byte 2 + .byte 6 + .byte 5 + .byte 12 + .byte 2 + .byte 8 + .byte 5 + .byte 0 + .byte 3 + .byte 0 + .byte 6 + .byte 1 + .byte 4 + .byte 2 + .byte 0 + .byte 5 + .byte 0 + .byte 11 + .byte 0 + .byte 7 + .byte 1 + .byte 15 + .byte 2 + .byte 14 + .byte 5 + .byte 12 + .byte 3 + .byte 8 + .byte 7 + .byte 1 + .byte 7 + .byte 3 + .byte 6 + .byte 7 + .byte 4 + .byte 14 + .byte 0 + .byte 13 + .byte 1 + .byte 11 + .byte 3 + .byte 6 + .byte 7 + .byte 13 + .byte 6 + .byte 11 + .byte 5 + .byte 6 + .byte 3 + .byte 12 + .byte 6 + .byte 9 + .byte 5 + .byte 2 + .byte 3 + .byte 4 + .byte 6 + .byte 9 + .byte 4 + .byte 2 + .byte 1 + .byte 5 + .byte 2 + .byte 10 + .byte 4 + .byte 4 + .byte 1 + .byte 9 + .byte 2 + .byte 2 + .byte 5 + .byte 4 + .byte 2 + .byte 8 + .byte 4 + .byte 0 + .byte 1 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_5, @object + .size table_5, 256 +table_5: + .byte 204 + .byte 198 + .byte 201 + .byte 192 + .byte 193 + .byte 202 + .byte 194 + .byte 203 + .byte 195 + .byte 200 + .byte 197 + .byte 205 + .byte 196 + .byte 206 + .byte 199 + .byte 207 + .byte 108 + .byte 102 + .byte 105 + .byte 96 + .byte 97 + .byte 106 + .byte 98 + .byte 107 + .byte 99 + .byte 104 + .byte 101 + .byte 109 + .byte 100 + .byte 110 + .byte 103 + .byte 111 + .byte 156 + .byte 150 + .byte 153 + .byte 144 + .byte 145 + .byte 154 + .byte 146 + .byte 155 + .byte 147 + .byte 152 + .byte 149 + .byte 157 + .byte 148 + .byte 158 + .byte 151 + .byte 159 + .byte 12 + .byte 6 + .byte 9 + .byte 0 + .byte 1 + .byte 10 + .byte 2 + .byte 11 + .byte 3 + .byte 8 + .byte 5 + .byte 13 + .byte 4 + .byte 14 + .byte 7 + .byte 15 + .byte 28 + .byte 22 + .byte 25 + .byte 16 + .byte 17 + .byte 26 + .byte 18 + .byte 27 + .byte 19 + .byte 24 + .byte 21 + .byte 29 + .byte 20 + .byte 30 + .byte 23 + .byte 31 + .byte 172 + .byte 166 + .byte 169 + .byte 160 + .byte 161 + .byte 170 + .byte 162 + .byte 171 + .byte 163 + .byte 168 + .byte 165 + .byte 173 + .byte 164 + .byte 174 + .byte 167 + .byte 175 + .byte 44 + .byte 38 + .byte 41 + .byte 32 + .byte 33 + .byte 42 + .byte 34 + .byte 43 + .byte 35 + .byte 40 + .byte 37 + .byte 45 + .byte 36 + .byte 46 + .byte 39 + .byte 47 + .byte 188 + .byte 182 + .byte 185 + .byte 176 + .byte 177 + .byte 186 + .byte 178 + .byte 187 + .byte 179 + .byte 184 + .byte 181 + .byte 189 + .byte 180 + .byte 190 + .byte 183 + .byte 191 + .byte 60 + .byte 54 + .byte 57 + .byte 48 + .byte 49 + .byte 58 + .byte 50 + .byte 59 + .byte 51 + .byte 56 + .byte 53 + .byte 61 + .byte 52 + .byte 62 + .byte 55 + .byte 63 + .byte 140 + .byte 134 + .byte 137 + .byte 128 + .byte 129 + .byte 138 + .byte 130 + .byte 139 + .byte 131 + .byte 136 + .byte 133 + .byte 141 + .byte 132 + .byte 142 + .byte 135 + .byte 143 + .byte 92 + .byte 86 + .byte 89 + .byte 80 + .byte 81 + .byte 90 + .byte 82 + .byte 91 + .byte 83 + .byte 88 + .byte 85 + .byte 93 + .byte 84 + .byte 94 + .byte 87 + .byte 95 + .byte 220 + .byte 214 + .byte 217 + .byte 208 + .byte 209 + .byte 218 + .byte 210 + .byte 219 + .byte 211 + .byte 216 + .byte 213 + .byte 221 + .byte 212 + .byte 222 + .byte 215 + .byte 223 + .byte 76 + .byte 70 + .byte 73 + .byte 64 + .byte 65 + .byte 74 + .byte 66 + .byte 75 + .byte 67 + .byte 72 + .byte 69 + .byte 77 + .byte 68 + .byte 78 + .byte 71 + .byte 79 + .byte 236 + .byte 230 + .byte 233 + .byte 224 + .byte 225 + .byte 234 + .byte 226 + .byte 235 + .byte 227 + .byte 232 + .byte 229 + .byte 237 + .byte 228 + .byte 238 + .byte 231 + .byte 239 + .byte 124 + .byte 118 + .byte 121 + .byte 112 + .byte 113 + .byte 122 + .byte 114 + .byte 123 + .byte 115 + .byte 120 + .byte 117 + .byte 125 + .byte 116 + .byte 126 + .byte 119 + .byte 127 + .byte 252 + .byte 246 + .byte 249 + .byte 240 + .byte 241 + .byte 250 + .byte 242 + .byte 251 + .byte 243 + .byte 248 + .byte 245 + .byte 253 + .byte 244 + .byte 254 + .byte 247 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_6, @object + .size table_6, 256 +table_6: + .byte 51 + .byte 52 + .byte 54 + .byte 56 + .byte 60 + .byte 58 + .byte 49 + .byte 62 + .byte 57 + .byte 50 + .byte 53 + .byte 55 + .byte 48 + .byte 59 + .byte 61 + .byte 63 + .byte 67 + .byte 68 + .byte 70 + .byte 72 + .byte 76 + .byte 74 + .byte 65 + .byte 78 + .byte 73 + .byte 66 + .byte 69 + .byte 71 + .byte 64 + .byte 75 + .byte 77 + .byte 79 + .byte 99 + .byte 100 + .byte 102 + .byte 104 + .byte 108 + .byte 106 + .byte 97 + .byte 110 + .byte 105 + .byte 98 + .byte 101 + .byte 103 + .byte 96 + .byte 107 + .byte 109 + .byte 111 + .byte 131 + .byte 132 + .byte 134 + .byte 136 + .byte 140 + .byte 138 + .byte 129 + .byte 142 + .byte 137 + .byte 130 + .byte 133 + .byte 135 + .byte 128 + .byte 139 + .byte 141 + .byte 143 + .byte 195 + .byte 196 + .byte 198 + .byte 200 + .byte 204 + .byte 202 + .byte 193 + .byte 206 + .byte 201 + .byte 194 + .byte 197 + .byte 199 + .byte 192 + .byte 203 + .byte 205 + .byte 207 + .byte 163 + .byte 164 + .byte 166 + .byte 168 + .byte 172 + .byte 170 + .byte 161 + .byte 174 + .byte 169 + .byte 162 + .byte 165 + .byte 167 + .byte 160 + .byte 171 + .byte 173 + .byte 175 + .byte 19 + .byte 20 + .byte 22 + .byte 24 + .byte 28 + .byte 26 + .byte 17 + .byte 30 + .byte 25 + .byte 18 + .byte 21 + .byte 23 + .byte 16 + .byte 27 + .byte 29 + .byte 31 + .byte 227 + .byte 228 + .byte 230 + .byte 232 + .byte 236 + .byte 234 + .byte 225 + .byte 238 + .byte 233 + .byte 226 + .byte 229 + .byte 231 + .byte 224 + .byte 235 + .byte 237 + .byte 239 + .byte 147 + .byte 148 + .byte 150 + .byte 152 + .byte 156 + .byte 154 + .byte 145 + .byte 158 + .byte 153 + .byte 146 + .byte 149 + .byte 151 + .byte 144 + .byte 155 + .byte 157 + .byte 159 + .byte 35 + .byte 36 + .byte 38 + .byte 40 + .byte 44 + .byte 42 + .byte 33 + .byte 46 + .byte 41 + .byte 34 + .byte 37 + .byte 39 + .byte 32 + .byte 43 + .byte 45 + .byte 47 + .byte 83 + .byte 84 + .byte 86 + .byte 88 + .byte 92 + .byte 90 + .byte 81 + .byte 94 + .byte 89 + .byte 82 + .byte 85 + .byte 87 + .byte 80 + .byte 91 + .byte 93 + .byte 95 + .byte 115 + .byte 116 + .byte 118 + .byte 120 + .byte 124 + .byte 122 + .byte 113 + .byte 126 + .byte 121 + .byte 114 + .byte 117 + .byte 119 + .byte 112 + .byte 123 + .byte 125 + .byte 127 + .byte 3 + .byte 4 + .byte 6 + .byte 8 + .byte 12 + .byte 10 + .byte 1 + .byte 14 + .byte 9 + .byte 2 + .byte 5 + .byte 7 + .byte 0 + .byte 11 + .byte 13 + .byte 15 + .byte 179 + .byte 180 + .byte 182 + .byte 184 + .byte 188 + .byte 186 + .byte 177 + .byte 190 + .byte 185 + .byte 178 + .byte 181 + .byte 183 + .byte 176 + .byte 187 + .byte 189 + .byte 191 + .byte 211 + .byte 212 + .byte 214 + .byte 216 + .byte 220 + .byte 218 + .byte 209 + .byte 222 + .byte 217 + .byte 210 + .byte 213 + .byte 215 + .byte 208 + .byte 219 + .byte 221 + .byte 223 + .byte 243 + .byte 244 + .byte 246 + .byte 248 + .byte 252 + .byte 250 + .byte 241 + .byte 254 + .byte 249 + .byte 242 + .byte 245 + .byte 247 + .byte 240 + .byte 251 + .byte 253 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_7, @object + .size table_7, 256 +table_7: + .byte 0 + .byte 2 + .byte 4 + .byte 6 + .byte 9 + .byte 11 + .byte 13 + .byte 15 + .byte 1 + .byte 3 + .byte 5 + .byte 7 + .byte 8 + .byte 10 + .byte 12 + .byte 14 + .byte 32 + .byte 34 + .byte 36 + .byte 38 + .byte 41 + .byte 43 + .byte 45 + .byte 47 + .byte 33 + .byte 35 + .byte 37 + .byte 39 + .byte 40 + .byte 42 + .byte 44 + .byte 46 + .byte 64 + .byte 66 + .byte 68 + .byte 70 + .byte 73 + .byte 75 + .byte 77 + .byte 79 + .byte 65 + .byte 67 + .byte 69 + .byte 71 + .byte 72 + .byte 74 + .byte 76 + .byte 78 + .byte 96 + .byte 98 + .byte 100 + .byte 102 + .byte 105 + .byte 107 + .byte 109 + .byte 111 + .byte 97 + .byte 99 + .byte 101 + .byte 103 + .byte 104 + .byte 106 + .byte 108 + .byte 110 + .byte 144 + .byte 146 + .byte 148 + .byte 150 + .byte 153 + .byte 155 + .byte 157 + .byte 159 + .byte 145 + .byte 147 + .byte 149 + .byte 151 + .byte 152 + .byte 154 + .byte 156 + .byte 158 + .byte 176 + .byte 178 + .byte 180 + .byte 182 + .byte 185 + .byte 187 + .byte 189 + .byte 191 + .byte 177 + .byte 179 + .byte 181 + .byte 183 + .byte 184 + .byte 186 + .byte 188 + .byte 190 + .byte 208 + .byte 210 + .byte 212 + .byte 214 + .byte 217 + .byte 219 + .byte 221 + .byte 223 + .byte 209 + .byte 211 + .byte 213 + .byte 215 + .byte 216 + .byte 218 + .byte 220 + .byte 222 + .byte 240 + .byte 242 + .byte 244 + .byte 246 + .byte 249 + .byte 251 + .byte 253 + .byte 255 + .byte 241 + .byte 243 + .byte 245 + .byte 247 + .byte 248 + .byte 250 + .byte 252 + .byte 254 + .byte 16 + .byte 18 + .byte 20 + .byte 22 + .byte 25 + .byte 27 + .byte 29 + .byte 31 + .byte 17 + .byte 19 + .byte 21 + .byte 23 + .byte 24 + .byte 26 + .byte 28 + .byte 30 + .byte 48 + .byte 50 + .byte 52 + .byte 54 + .byte 57 + .byte 59 + .byte 61 + .byte 63 + .byte 49 + .byte 51 + .byte 53 + .byte 55 + .byte 56 + .byte 58 + .byte 60 + .byte 62 + .byte 80 + .byte 82 + .byte 84 + .byte 86 + .byte 89 + .byte 91 + .byte 93 + .byte 95 + .byte 81 + .byte 83 + .byte 85 + .byte 87 + .byte 88 + .byte 90 + .byte 92 + .byte 94 + .byte 112 + .byte 114 + .byte 116 + .byte 118 + .byte 121 + .byte 123 + .byte 125 + .byte 127 + .byte 113 + .byte 115 + .byte 117 + .byte 119 + .byte 120 + .byte 122 + .byte 124 + .byte 126 + .byte 128 + .byte 130 + .byte 132 + .byte 134 + .byte 137 + .byte 139 + .byte 141 + .byte 143 + .byte 129 + .byte 131 + .byte 133 + .byte 135 + .byte 136 + .byte 138 + .byte 140 + .byte 142 + .byte 160 + .byte 162 + .byte 164 + .byte 166 + .byte 169 + .byte 171 + .byte 173 + .byte 175 + .byte 161 + .byte 163 + .byte 165 + .byte 167 + .byte 168 + .byte 170 + .byte 172 + .byte 174 + .byte 192 + .byte 194 + .byte 196 + .byte 198 + .byte 201 + .byte 203 + .byte 205 + .byte 207 + .byte 193 + .byte 195 + .byte 197 + .byte 199 + .byte 200 + .byte 202 + .byte 204 + .byte 206 + .byte 224 + .byte 226 + .byte 228 + .byte 230 + .byte 233 + .byte 235 + .byte 237 + .byte 239 + .byte 225 + .byte 227 + .byte 229 + .byte 231 + .byte 232 + .byte 234 + .byte 236 + .byte 238 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_8, @object + .size table_8, 256 +table_8: + .byte 0 + .byte 8 + .byte 1 + .byte 9 + .byte 2 + .byte 10 + .byte 3 + .byte 11 + .byte 12 + .byte 4 + .byte 13 + .byte 5 + .byte 14 + .byte 6 + .byte 15 + .byte 7 + .byte 128 + .byte 136 + .byte 129 + .byte 137 + .byte 130 + .byte 138 + .byte 131 + .byte 139 + .byte 140 + .byte 132 + .byte 141 + .byte 133 + .byte 142 + .byte 134 + .byte 143 + .byte 135 + .byte 16 + .byte 24 + .byte 17 + .byte 25 + .byte 18 + .byte 26 + .byte 19 + .byte 27 + .byte 28 + .byte 20 + .byte 29 + .byte 21 + .byte 30 + .byte 22 + .byte 31 + .byte 23 + .byte 144 + .byte 152 + .byte 145 + .byte 153 + .byte 146 + .byte 154 + .byte 147 + .byte 155 + .byte 156 + .byte 148 + .byte 157 + .byte 149 + .byte 158 + .byte 150 + .byte 159 + .byte 151 + .byte 32 + .byte 40 + .byte 33 + .byte 41 + .byte 34 + .byte 42 + .byte 35 + .byte 43 + .byte 44 + .byte 36 + .byte 45 + .byte 37 + .byte 46 + .byte 38 + .byte 47 + .byte 39 + .byte 160 + .byte 168 + .byte 161 + .byte 169 + .byte 162 + .byte 170 + .byte 163 + .byte 171 + .byte 172 + .byte 164 + .byte 173 + .byte 165 + .byte 174 + .byte 166 + .byte 175 + .byte 167 + .byte 48 + .byte 56 + .byte 49 + .byte 57 + .byte 50 + .byte 58 + .byte 51 + .byte 59 + .byte 60 + .byte 52 + .byte 61 + .byte 53 + .byte 62 + .byte 54 + .byte 63 + .byte 55 + .byte 176 + .byte 184 + .byte 177 + .byte 185 + .byte 178 + .byte 186 + .byte 179 + .byte 187 + .byte 188 + .byte 180 + .byte 189 + .byte 181 + .byte 190 + .byte 182 + .byte 191 + .byte 183 + .byte 192 + .byte 200 + .byte 193 + .byte 201 + .byte 194 + .byte 202 + .byte 195 + .byte 203 + .byte 204 + .byte 196 + .byte 205 + .byte 197 + .byte 206 + .byte 198 + .byte 207 + .byte 199 + .byte 64 + .byte 72 + .byte 65 + .byte 73 + .byte 66 + .byte 74 + .byte 67 + .byte 75 + .byte 76 + .byte 68 + .byte 77 + .byte 69 + .byte 78 + .byte 70 + .byte 79 + .byte 71 + .byte 208 + .byte 216 + .byte 209 + .byte 217 + .byte 210 + .byte 218 + .byte 211 + .byte 219 + .byte 220 + .byte 212 + .byte 221 + .byte 213 + .byte 222 + .byte 214 + .byte 223 + .byte 215 + .byte 80 + .byte 88 + .byte 81 + .byte 89 + .byte 82 + .byte 90 + .byte 83 + .byte 91 + .byte 92 + .byte 84 + .byte 93 + .byte 85 + .byte 94 + .byte 86 + .byte 95 + .byte 87 + .byte 224 + .byte 232 + .byte 225 + .byte 233 + .byte 226 + .byte 234 + .byte 227 + .byte 235 + .byte 236 + .byte 228 + .byte 237 + .byte 229 + .byte 238 + .byte 230 + .byte 239 + .byte 231 + .byte 96 + .byte 104 + .byte 97 + .byte 105 + .byte 98 + .byte 106 + .byte 99 + .byte 107 + .byte 108 + .byte 100 + .byte 109 + .byte 101 + .byte 110 + .byte 102 + .byte 111 + .byte 103 + .byte 240 + .byte 248 + .byte 241 + .byte 249 + .byte 242 + .byte 250 + .byte 243 + .byte 251 + .byte 252 + .byte 244 + .byte 253 + .byte 245 + .byte 254 + .byte 246 + .byte 255 + .byte 247 + .byte 112 + .byte 120 + .byte 113 + .byte 121 + .byte 114 + .byte 122 + .byte 115 + .byte 123 + .byte 124 + .byte 116 + .byte 125 + .byte 117 + .byte 126 + .byte 118 + .byte 127 + .byte 119 + + .text +.global forkskinny_128_256_rounds + .type forkskinny_128_256_rounds, @function +forkskinny_128_256_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 51 + ldd r2,Z+32 + ldd r3,Z+33 + ldd r4,Z+34 + ldd r5,Z+35 + ldd r6,Z+36 + ldd r7,Z+37 + ldd r8,Z+38 + ldd r9,Z+39 + ldd r10,Z+40 + ldd r11,Z+41 + ldd r12,Z+42 + ldd r13,Z+43 + ldd r14,Z+44 + ldd r15,Z+45 + ldd r24,Z+46 + ldd r25,Z+47 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + lsl r20 + std Y+33,r20 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +86: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + mov r0,r14 + mov r14,r15 + mov r15,r24 + mov r24,r25 + mov r25,r0 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + movw r18,r14 + movw r26,r24 + eor r18,r10 + eor r19,r11 + eor r26,r12 + eor r27,r13 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r18 + movw r4,r26 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + ldd r0,Y+1 + std Y+9,r0 + ldd r0,Y+2 + std Y+10,r0 + ldd r0,Y+3 + std Y+11,r0 + ldd r0,Y+4 + std Y+12,r0 + ldd r0,Y+5 + std Y+13,r0 + ldd r0,Y+6 + std Y+14,r0 + ldd r0,Y+7 + std Y+15,r0 + ldd r0,Y+8 + std Y+16,r0 + std Y+1,r19 + std Y+2,r17 + std Y+3,r18 + std Y+4,r21 + std Y+5,r26 + std Y+6,r16 + std Y+7,r20 + std Y+8,r27 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r16,Y+31 + ldd r17,Y+32 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+17 + std Y+25,r0 + ldd r0,Y+18 + std Y+26,r0 + ldd r0,Y+19 + std Y+27,r0 + ldd r0,Y+20 + std Y+28,r0 + ldd r0,Y+21 + std Y+29,r0 + ldd r0,Y+22 + std Y+30,r0 + ldd r0,Y+23 + std Y+31,r0 + ldd r0,Y+24 + std Y+32,r0 + std Y+17,r19 + std Y+18,r17 + std Y+19,r18 + std Y+20,r21 + std Y+21,r26 + std Y+22,r16 + std Y+23,r20 + std Y+24,r27 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + ldd r18,Y+33 + cp r22,r18 + breq 5259f + rjmp 86b +5259: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+32,r2 + std Z+33,r3 + std Z+34,r4 + std Z+35,r5 + std Z+36,r6 + std Z+37,r7 + std Z+38,r8 + std Z+39,r9 + std Z+40,r10 + std Z+41,r11 + std Z+42,r12 + std Z+43,r13 + std Z+44,r14 + std Z+45,r15 + std Z+46,r24 + std Z+47,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + adiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_256_rounds, .-forkskinny_128_256_rounds + + .text +.global forkskinny_128_256_inv_rounds + .type forkskinny_128_256_inv_rounds, @function +forkskinny_128_256_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 51 + ldd r2,Z+32 + ldd r3,Z+33 + ldd r4,Z+34 + ldd r5,Z+35 + ldd r6,Z+36 + ldd r7,Z+37 + ldd r8,Z+38 + ldd r9,Z+39 + ldd r10,Z+40 + ldd r11,Z+41 + ldd r12,Z+42 + ldd r13,Z+43 + ldd r14,Z+44 + ldd r15,Z+45 + ldd r24,Z+46 + ldd r25,Z+47 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + lsl r20 + std Y+33,r20 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +86: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + ldd r0,Y+9 + std Y+1,r0 + ldd r0,Y+10 + std Y+2,r0 + ldd r0,Y+11 + std Y+3,r0 + ldd r0,Y+12 + std Y+4,r0 + ldd r0,Y+13 + std Y+5,r0 + ldd r0,Y+14 + std Y+6,r0 + ldd r0,Y+15 + std Y+7,r0 + ldd r0,Y+16 + std Y+8,r0 + std Y+9,r26 + std Y+10,r18 + std Y+11,r20 + std Y+12,r17 + std Y+13,r16 + std Y+14,r27 + std Y+15,r21 + std Y+16,r19 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r16,Y+23 + ldd r17,Y+24 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+25 + std Y+17,r0 + ldd r0,Y+26 + std Y+18,r0 + ldd r0,Y+27 + std Y+19,r0 + ldd r0,Y+28 + std Y+20,r0 + ldd r0,Y+29 + std Y+21,r0 + ldd r0,Y+30 + std Y+22,r0 + ldd r0,Y+31 + std Y+23,r0 + ldd r0,Y+32 + std Y+24,r0 + std Y+25,r26 + std Y+26,r18 + std Y+27,r20 + std Y+28,r17 + std Y+29,r16 + std Y+30,r27 + std Y+31,r21 + std Y+32,r19 + movw r18,r2 + movw r26,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r18 + movw r24,r26 + eor r14,r10 + eor r15,r11 + eor r24,r12 + eor r25,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + mov r0,r6 + mov r6,r7 + mov r7,r8 + mov r8,r9 + mov r9,r0 + mov r0,r10 + mov r10,r12 + mov r12,r0 + mov r0,r11 + mov r11,r13 + mov r13,r0 + mov r0,r25 + mov r25,r24 + mov r24,r15 + mov r15,r14 + mov r14,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + ldi r30,lo8(table_1) + ldi r31,hi8(table_1) +#if defined(RAMPZ) + ldi r18,hh8(table_1) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+33 + cp r22,r18 + breq 5259f + rjmp 86b +5259: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+32,r2 + std Z+33,r3 + std Z+34,r4 + std Z+35,r5 + std Z+36,r6 + std Z+37,r7 + std Z+38,r8 + std Z+39,r9 + std Z+40,r10 + std Z+41,r11 + std Z+42,r12 + std Z+43,r13 + std Z+44,r14 + std Z+45,r15 + std Z+46,r24 + std Z+47,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + adiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_256_inv_rounds, .-forkskinny_128_256_inv_rounds + + .text +.global forkskinny_128_256_forward_tk + .type forkskinny_128_256_forward_tk, @function +forkskinny_128_256_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 34 + ld r4,Z + ldd r5,Z+1 + ldd r6,Z+2 + ldd r7,Z+3 + ldd r8,Z+4 + ldd r9,Z+5 + ldd r10,Z+6 + ldd r11,Z+7 + ldd r12,Z+8 + ldd r13,Z+9 + ldd r14,Z+10 + ldd r15,Z+11 + ldd r24,Z+12 + ldd r25,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + push r31 + push r30 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r23,hh8(table_2) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +51: + movw r18,r12 + movw r20,r14 + movw r26,r24 + movw r2,r16 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + mov r4,r19 + mov r5,r3 + mov r6,r18 + mov r7,r27 + mov r8,r20 + mov r9,r2 + mov r10,r26 + mov r11,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r26,Y+13 + ldd r27,Y+14 + ldd r2,Y+15 + ldd r3,Y+16 + ldd r23,Y+1 + std Y+9,r23 + ldd r23,Y+2 + std Y+10,r23 + ldd r23,Y+3 + std Y+11,r23 + ldd r23,Y+4 + std Y+12,r23 + ldd r23,Y+5 + std Y+13,r23 + ldd r23,Y+6 + std Y+14,r23 + ldd r23,Y+7 + std Y+15,r23 + ldd r23,Y+8 + std Y+16,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+1,r19 + std Y+2,r3 + std Y+3,r18 + std Y+4,r27 + std Y+5,r20 + std Y+6,r2 + std Y+7,r26 + std Y+8,r21 + dec r22 + breq 5109f + rjmp 51b +5109: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r4 + std Z+1,r5 + std Z+2,r6 + std Z+3,r7 + std Z+4,r8 + std Z+5,r9 + std Z+6,r10 + std Z+7,r11 + std Z+8,r12 + std Z+9,r13 + std Z+10,r14 + std Z+11,r15 + std Z+12,r24 + std Z+13,r25 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_256_forward_tk, .-forkskinny_128_256_forward_tk + + .text +.global forkskinny_128_256_reverse_tk + .type forkskinny_128_256_reverse_tk, @function +forkskinny_128_256_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 34 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r23,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +51: + movw r18,r2 + movw r20,r4 + movw r26,r6 + movw r24,r8 + movw r2,r10 + movw r4,r12 + movw r6,r14 + movw r8,r16 + mov r10,r20 + mov r11,r18 + mov r12,r26 + mov r13,r25 + mov r14,r24 + mov r15,r21 + mov r16,r27 + mov r17,r19 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r24,Y+7 + ldd r25,Y+8 + ldd r23,Y+9 + std Y+1,r23 + ldd r23,Y+10 + std Y+2,r23 + ldd r23,Y+11 + std Y+3,r23 + ldd r23,Y+12 + std Y+4,r23 + ldd r23,Y+13 + std Y+5,r23 + ldd r23,Y+14 + std Y+6,r23 + ldd r23,Y+15 + std Y+7,r23 + ldd r23,Y+16 + std Y+8,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+9,r20 + std Y+10,r18 + std Y+11,r26 + std Y+12,r25 + std Y+13,r24 + std Y+14,r21 + std Y+15,r27 + std Y+16,r19 + dec r22 + breq 5109f + rjmp 51b +5109: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r2 + std Z+1,r3 + std Z+2,r4 + std Z+3,r5 + std Z+4,r6 + std Z+5,r7 + std Z+6,r8 + std Z+7,r9 + std Z+8,r10 + std Z+9,r11 + std Z+10,r12 + std Z+11,r13 + std Z+12,r14 + std Z+13,r15 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_256_reverse_tk, .-forkskinny_128_256_reverse_tk + + .text +.global forkskinny_128_384_rounds + .type forkskinny_128_384_rounds, @function +forkskinny_128_384_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 67 + ldd r2,Z+48 + ldd r3,Z+49 + ldd r4,Z+50 + ldd r5,Z+51 + ldd r6,Z+52 + ldd r7,Z+53 + ldd r8,Z+54 + ldd r9,Z+55 + ldd r10,Z+56 + ldd r11,Z+57 + ldd r12,Z+58 + ldd r13,Z+59 + ldd r14,Z+60 + ldd r15,Z+61 + ldd r24,Z+62 + ldd r25,Z+63 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r26,Z+34 + ldd r27,Z+35 + std Y+33,r18 + std Y+34,r19 + std Y+35,r26 + std Y+36,r27 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + std Y+37,r18 + std Y+38,r19 + std Y+39,r26 + std Y+40,r27 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r26,Z+42 + ldd r27,Z+43 + std Y+41,r18 + std Y+42,r19 + std Y+43,r26 + std Y+44,r27 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r26,Z+46 + ldd r27,Z+47 + std Y+45,r18 + std Y+46,r19 + std Y+47,r26 + std Y+48,r27 + lsl r20 + std Y+49,r20 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +118: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + mov r0,r14 + mov r14,r15 + mov r15,r24 + mov r24,r25 + mov r25,r0 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + movw r18,r14 + movw r26,r24 + eor r18,r10 + eor r19,r11 + eor r26,r12 + eor r27,r13 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r18 + movw r4,r26 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + ldd r0,Y+1 + std Y+9,r0 + ldd r0,Y+2 + std Y+10,r0 + ldd r0,Y+3 + std Y+11,r0 + ldd r0,Y+4 + std Y+12,r0 + ldd r0,Y+5 + std Y+13,r0 + ldd r0,Y+6 + std Y+14,r0 + ldd r0,Y+7 + std Y+15,r0 + ldd r0,Y+8 + std Y+16,r0 + std Y+1,r19 + std Y+2,r17 + std Y+3,r18 + std Y+4,r21 + std Y+5,r26 + std Y+6,r16 + std Y+7,r20 + std Y+8,r27 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r16,Y+31 + ldd r17,Y+32 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+17 + std Y+25,r0 + ldd r0,Y+18 + std Y+26,r0 + ldd r0,Y+19 + std Y+27,r0 + ldd r0,Y+20 + std Y+28,r0 + ldd r0,Y+21 + std Y+29,r0 + ldd r0,Y+22 + std Y+30,r0 + ldd r0,Y+23 + std Y+31,r0 + ldd r0,Y+24 + std Y+32,r0 + std Y+17,r19 + std Y+18,r17 + std Y+19,r18 + std Y+20,r21 + std Y+21,r26 + std Y+22,r16 + std Y+23,r20 + std Y+24,r27 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + ldd r20,Y+45 + ldd r21,Y+46 + ldd r16,Y+47 + ldd r17,Y+48 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+33 + std Y+41,r0 + ldd r0,Y+34 + std Y+42,r0 + ldd r0,Y+35 + std Y+43,r0 + ldd r0,Y+36 + std Y+44,r0 + ldd r0,Y+37 + std Y+45,r0 + ldd r0,Y+38 + std Y+46,r0 + ldd r0,Y+39 + std Y+47,r0 + ldd r0,Y+40 + std Y+48,r0 + std Y+33,r19 + std Y+34,r17 + std Y+35,r18 + std Y+36,r21 + std Y+37,r26 + std Y+38,r16 + std Y+39,r20 + std Y+40,r27 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + ldd r18,Y+49 + cp r22,r18 + breq 5348f + rjmp 118b +5348: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+48,r2 + std Z+49,r3 + std Z+50,r4 + std Z+51,r5 + std Z+52,r6 + std Z+53,r7 + std Z+54,r8 + std Z+55,r9 + std Z+56,r10 + std Z+57,r11 + std Z+58,r12 + std Z+59,r13 + std Z+60,r14 + std Z+61,r15 + std Z+62,r24 + std Z+63,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + std Z+32,r18 + std Z+33,r19 + std Z+34,r26 + std Z+35,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + std Z+36,r18 + std Z+37,r19 + std Z+38,r26 + std Z+39,r27 + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + std Z+40,r18 + std Z+41,r19 + std Z+42,r26 + std Z+43,r27 + ldd r18,Y+45 + ldd r19,Y+46 + ldd r26,Y+47 + ldd r27,Y+48 + std Z+44,r18 + std Z+45,r19 + std Z+46,r26 + std Z+47,r27 + adiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_384_rounds, .-forkskinny_128_384_rounds + + .text +.global forkskinny_128_384_inv_rounds + .type forkskinny_128_384_inv_rounds, @function +forkskinny_128_384_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 67 + ldd r2,Z+48 + ldd r3,Z+49 + ldd r4,Z+50 + ldd r5,Z+51 + ldd r6,Z+52 + ldd r7,Z+53 + ldd r8,Z+54 + ldd r9,Z+55 + ldd r10,Z+56 + ldd r11,Z+57 + ldd r12,Z+58 + ldd r13,Z+59 + ldd r14,Z+60 + ldd r15,Z+61 + ldd r24,Z+62 + ldd r25,Z+63 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r26,Z+34 + ldd r27,Z+35 + std Y+33,r18 + std Y+34,r19 + std Y+35,r26 + std Y+36,r27 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + std Y+37,r18 + std Y+38,r19 + std Y+39,r26 + std Y+40,r27 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r26,Z+42 + ldd r27,Z+43 + std Y+41,r18 + std Y+42,r19 + std Y+43,r26 + std Y+44,r27 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r26,Z+46 + ldd r27,Z+47 + std Y+45,r18 + std Y+46,r19 + std Y+47,r26 + std Y+48,r27 + lsl r20 + std Y+49,r20 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +118: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + ldd r0,Y+9 + std Y+1,r0 + ldd r0,Y+10 + std Y+2,r0 + ldd r0,Y+11 + std Y+3,r0 + ldd r0,Y+12 + std Y+4,r0 + ldd r0,Y+13 + std Y+5,r0 + ldd r0,Y+14 + std Y+6,r0 + ldd r0,Y+15 + std Y+7,r0 + ldd r0,Y+16 + std Y+8,r0 + std Y+9,r26 + std Y+10,r18 + std Y+11,r20 + std Y+12,r17 + std Y+13,r16 + std Y+14,r27 + std Y+15,r21 + std Y+16,r19 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r16,Y+23 + ldd r17,Y+24 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+25 + std Y+17,r0 + ldd r0,Y+26 + std Y+18,r0 + ldd r0,Y+27 + std Y+19,r0 + ldd r0,Y+28 + std Y+20,r0 + ldd r0,Y+29 + std Y+21,r0 + ldd r0,Y+30 + std Y+22,r0 + ldd r0,Y+31 + std Y+23,r0 + ldd r0,Y+32 + std Y+24,r0 + std Y+25,r26 + std Y+26,r18 + std Y+27,r20 + std Y+28,r17 + std Y+29,r16 + std Y+30,r27 + std Y+31,r21 + std Y+32,r19 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + ldd r20,Y+37 + ldd r21,Y+38 + ldd r16,Y+39 + ldd r17,Y+40 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+41 + std Y+33,r0 + ldd r0,Y+42 + std Y+34,r0 + ldd r0,Y+43 + std Y+35,r0 + ldd r0,Y+44 + std Y+36,r0 + ldd r0,Y+45 + std Y+37,r0 + ldd r0,Y+46 + std Y+38,r0 + ldd r0,Y+47 + std Y+39,r0 + ldd r0,Y+48 + std Y+40,r0 + std Y+41,r26 + std Y+42,r18 + std Y+43,r20 + std Y+44,r17 + std Y+45,r16 + std Y+46,r27 + std Y+47,r21 + std Y+48,r19 + movw r18,r2 + movw r26,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r18 + movw r24,r26 + eor r14,r10 + eor r15,r11 + eor r24,r12 + eor r25,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + mov r0,r6 + mov r6,r7 + mov r7,r8 + mov r8,r9 + mov r9,r0 + mov r0,r10 + mov r10,r12 + mov r12,r0 + mov r0,r11 + mov r11,r13 + mov r13,r0 + mov r0,r25 + mov r25,r24 + mov r24,r15 + mov r15,r14 + mov r14,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + ldi r30,lo8(table_1) + ldi r31,hi8(table_1) +#if defined(RAMPZ) + ldi r18,hh8(table_1) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+49 + cp r22,r18 + breq 5348f + rjmp 118b +5348: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+48,r2 + std Z+49,r3 + std Z+50,r4 + std Z+51,r5 + std Z+52,r6 + std Z+53,r7 + std Z+54,r8 + std Z+55,r9 + std Z+56,r10 + std Z+57,r11 + std Z+58,r12 + std Z+59,r13 + std Z+60,r14 + std Z+61,r15 + std Z+62,r24 + std Z+63,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + std Z+32,r18 + std Z+33,r19 + std Z+34,r26 + std Z+35,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + std Z+36,r18 + std Z+37,r19 + std Z+38,r26 + std Z+39,r27 + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + std Z+40,r18 + std Z+41,r19 + std Z+42,r26 + std Z+43,r27 + ldd r18,Y+45 + ldd r19,Y+46 + ldd r26,Y+47 + ldd r27,Y+48 + std Z+44,r18 + std Z+45,r19 + std Z+46,r26 + std Z+47,r27 + adiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_384_inv_rounds, .-forkskinny_128_384_inv_rounds + + .text +.global forkskinny_128_384_forward_tk + .type forkskinny_128_384_forward_tk, @function +forkskinny_128_384_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r4,Z + ldd r5,Z+1 + ldd r6,Z+2 + ldd r7,Z+3 + ldd r8,Z+4 + ldd r9,Z+5 + ldd r10,Z+6 + ldd r11,Z+7 + ldd r12,Z+8 + ldd r13,Z+9 + ldd r14,Z+10 + ldd r15,Z+11 + ldd r24,Z+12 + ldd r25,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + std Y+17,r18 + std Y+18,r19 + std Y+19,r20 + std Y+20,r21 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r20,Z+38 + ldd r21,Z+39 + std Y+21,r18 + std Y+22,r19 + std Y+23,r20 + std Y+24,r21 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r20,Z+42 + ldd r21,Z+43 + std Y+25,r18 + std Y+26,r19 + std Y+27,r20 + std Y+28,r21 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r20,Z+46 + ldd r21,Z+47 + std Y+29,r18 + std Y+30,r19 + std Y+31,r20 + std Y+32,r21 + push r31 + push r30 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r23,hh8(table_2) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +83: + movw r18,r12 + movw r20,r14 + movw r26,r24 + movw r2,r16 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + mov r4,r19 + mov r5,r3 + mov r6,r18 + mov r7,r27 + mov r8,r20 + mov r9,r2 + mov r10,r26 + mov r11,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r26,Y+13 + ldd r27,Y+14 + ldd r2,Y+15 + ldd r3,Y+16 + ldd r23,Y+1 + std Y+9,r23 + ldd r23,Y+2 + std Y+10,r23 + ldd r23,Y+3 + std Y+11,r23 + ldd r23,Y+4 + std Y+12,r23 + ldd r23,Y+5 + std Y+13,r23 + ldd r23,Y+6 + std Y+14,r23 + ldd r23,Y+7 + std Y+15,r23 + ldd r23,Y+8 + std Y+16,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+1,r19 + std Y+2,r3 + std Y+3,r18 + std Y+4,r27 + std Y+5,r20 + std Y+6,r2 + std Y+7,r26 + std Y+8,r21 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + ldd r26,Y+29 + ldd r27,Y+30 + ldd r2,Y+31 + ldd r3,Y+32 + ldd r23,Y+17 + std Y+25,r23 + ldd r23,Y+18 + std Y+26,r23 + ldd r23,Y+19 + std Y+27,r23 + ldd r23,Y+20 + std Y+28,r23 + ldd r23,Y+21 + std Y+29,r23 + ldd r23,Y+22 + std Y+30,r23 + ldd r23,Y+23 + std Y+31,r23 + ldd r23,Y+24 + std Y+32,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+17,r19 + std Y+18,r3 + std Y+19,r18 + std Y+20,r27 + std Y+21,r20 + std Y+22,r2 + std Y+23,r26 + std Y+24,r21 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + breq 5183f + rjmp 83b +5183: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r4 + std Z+1,r5 + std Z+2,r6 + std Z+3,r7 + std Z+4,r8 + std Z+5,r9 + std Z+6,r10 + std Z+7,r11 + std Z+8,r12 + std Z+9,r13 + std Z+10,r14 + std Z+11,r15 + std Z+12,r24 + std Z+13,r25 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r20,Y+23 + ldd r21,Y+24 + std Z+36,r18 + std Z+37,r19 + std Z+38,r20 + std Z+39,r21 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + std Z+40,r18 + std Z+41,r19 + std Z+42,r20 + std Z+43,r21 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r20,Y+31 + ldd r21,Y+32 + std Z+44,r18 + std Z+45,r19 + std Z+46,r20 + std Z+47,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_384_forward_tk, .-forkskinny_128_384_forward_tk + + .text +.global forkskinny_128_384_reverse_tk + .type forkskinny_128_384_reverse_tk, @function +forkskinny_128_384_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + std Y+17,r18 + std Y+18,r19 + std Y+19,r20 + std Y+20,r21 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r20,Z+38 + ldd r21,Z+39 + std Y+21,r18 + std Y+22,r19 + std Y+23,r20 + std Y+24,r21 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r20,Z+42 + ldd r21,Z+43 + std Y+25,r18 + std Y+26,r19 + std Y+27,r20 + std Y+28,r21 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r20,Z+46 + ldd r21,Z+47 + std Y+29,r18 + std Y+30,r19 + std Y+31,r20 + std Y+32,r21 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r23,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +83: + movw r18,r2 + movw r20,r4 + movw r26,r6 + movw r24,r8 + movw r2,r10 + movw r4,r12 + movw r6,r14 + movw r8,r16 + mov r10,r20 + mov r11,r18 + mov r12,r26 + mov r13,r25 + mov r14,r24 + mov r15,r21 + mov r16,r27 + mov r17,r19 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r24,Y+7 + ldd r25,Y+8 + ldd r23,Y+9 + std Y+1,r23 + ldd r23,Y+10 + std Y+2,r23 + ldd r23,Y+11 + std Y+3,r23 + ldd r23,Y+12 + std Y+4,r23 + ldd r23,Y+13 + std Y+5,r23 + ldd r23,Y+14 + std Y+6,r23 + ldd r23,Y+15 + std Y+7,r23 + ldd r23,Y+16 + std Y+8,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+9,r20 + std Y+10,r18 + std Y+11,r26 + std Y+12,r25 + std Y+13,r24 + std Y+14,r21 + std Y+15,r27 + std Y+16,r19 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + ldd r26,Y+21 + ldd r27,Y+22 + ldd r24,Y+23 + ldd r25,Y+24 + ldd r23,Y+25 + std Y+17,r23 + ldd r23,Y+26 + std Y+18,r23 + ldd r23,Y+27 + std Y+19,r23 + ldd r23,Y+28 + std Y+20,r23 + ldd r23,Y+29 + std Y+21,r23 + ldd r23,Y+30 + std Y+22,r23 + ldd r23,Y+31 + std Y+23,r23 + ldd r23,Y+32 + std Y+24,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+25,r20 + std Y+26,r18 + std Y+27,r26 + std Y+28,r25 + std Y+29,r24 + std Y+30,r21 + std Y+31,r27 + std Y+32,r19 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + breq 5183f + rjmp 83b +5183: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r2 + std Z+1,r3 + std Z+2,r4 + std Z+3,r5 + std Z+4,r6 + std Z+5,r7 + std Z+6,r8 + std Z+7,r9 + std Z+8,r10 + std Z+9,r11 + std Z+10,r12 + std Z+11,r13 + std Z+12,r14 + std Z+13,r15 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r20,Y+23 + ldd r21,Y+24 + std Z+36,r18 + std Z+37,r19 + std Z+38,r20 + std Z+39,r21 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + std Z+40,r18 + std Z+41,r19 + std Z+42,r20 + std Z+43,r21 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r20,Y+31 + ldd r21,Y+32 + std Z+44,r18 + std Z+45,r19 + std Z+46,r20 + std Z+47,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_384_reverse_tk, .-forkskinny_128_384_reverse_tk + + .text +.global forkskinny_64_192_rounds + .type forkskinny_64_192_rounds, @function +forkskinny_64_192_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 38 + ldd r26,Z+24 + ldd r27,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + ld r18,Z + ldd r19,Z+1 + std Y+1,r18 + std Y+2,r19 + ldd r18,Z+2 + ldd r19,Z+3 + std Y+3,r18 + std Y+4,r19 + ldd r18,Z+4 + ldd r19,Z+5 + std Y+5,r18 + std Y+6,r19 + ldd r18,Z+6 + ldd r19,Z+7 + std Y+7,r18 + std Y+8,r19 + ldd r18,Z+8 + ldd r19,Z+9 + std Y+9,r18 + std Y+10,r19 + ldd r18,Z+10 + ldd r19,Z+11 + std Y+11,r18 + std Y+12,r19 + ldd r18,Z+12 + ldd r19,Z+13 + std Y+13,r18 + std Y+14,r19 + ldd r18,Z+14 + ldd r19,Z+15 + std Y+15,r18 + std Y+16,r19 + ldd r18,Z+16 + ldd r19,Z+17 + std Y+17,r18 + std Y+18,r19 + ldd r18,Z+18 + ldd r19,Z+19 + std Y+19,r18 + std Y+20,r19 + ldd r18,Z+20 + ldd r19,Z+21 + std Y+21,r18 + std Y+22,r19 + ldd r18,Z+22 + ldd r19,Z+23 + std Y+23,r18 + std Y+24,r19 + push r31 + push r30 + ldi r30,lo8(table_5) + ldi r31,hi8(table_5) +#if defined(RAMPZ) + ldi r18,hh8(table_5) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 + lsl r20 +61: + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + eor r26,r18 + eor r27,r19 + ldd r18,Y+3 + ldd r19,Y+4 + eor r2,r18 + eor r3,r19 + ldd r18,Y+9 + ldd r19,Y+10 + eor r26,r18 + eor r27,r19 + ldd r18,Y+11 + ldd r19,Y+12 + eor r2,r18 + eor r3,r19 + ldd r18,Y+17 + ldd r19,Y+18 + eor r26,r18 + eor r27,r19 + ldd r18,Y+19 + ldd r19,Y+20 + eor r2,r18 + eor r3,r19 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r27,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r3,r18 + ldi r18,32 + eor r5,r18 + eor r26,r18 + mov r0,r1 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + or r3,r0 + mov r0,r4 + mov r4,r5 + mov r5,r0 + mov r0,r6 + mov r6,r7 + mov r7,r0 + mov r0,r1 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + or r7,r0 + eor r2,r4 + eor r3,r5 + eor r4,r26 + eor r5,r27 + movw r18,r6 + eor r18,r4 + eor r19,r5 + movw r6,r4 + movw r4,r2 + movw r2,r26 + movw r26,r18 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r8,Y+3 + ldd r9,Y+4 + ldd r10,Y+5 + ldd r11,Y+6 + ldd r12,Y+7 + ldd r13,Y+8 + std Y+5,r18 + std Y+6,r19 + std Y+7,r8 + std Y+8,r9 + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + ldi r25,240 + and r9,r25 + swap r12 + ldi r24,15 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+1,r18 + std Y+2,r19 + std Y+3,r8 + std Y+4,r9 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r18,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+9 + ldd r19,Y+10 + ldd r8,Y+11 + ldd r9,Y+12 + ldd r10,Y+13 + ldd r11,Y+14 + ldd r12,Y+15 + ldd r13,Y+16 + std Y+13,r18 + std Y+14,r19 + std Y+15,r8 + std Y+16,r9 + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + and r9,r25 + swap r12 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+9,r18 + std Y+10,r19 + std Y+11,r8 + std Y+12,r9 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r8,Y+19 + ldd r9,Y+20 + ldd r10,Y+21 + ldd r11,Y+22 + ldd r12,Y+23 + ldd r13,Y+24 + std Y+21,r18 + std Y+22,r19 + std Y+23,r8 + std Y+24,r9 + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + and r9,r25 + swap r12 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+17,r18 + std Y+18,r19 + std Y+19,r8 + std Y+20,r9 + ldi r30,lo8(table_5) + ldi r31,hi8(table_5) +#if defined(RAMPZ) + ldi r18,hh8(table_5) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + cp r22,r20 + breq 5273f + rjmp 61b +5273: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+24,r26 + std Z+25,r27 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Y+1 + ldd r19,Y+2 + st Z,r18 + std Z+1,r19 + ldd r18,Y+3 + ldd r19,Y+4 + std Z+2,r18 + std Z+3,r19 + ldd r18,Y+5 + ldd r19,Y+6 + std Z+4,r18 + std Z+5,r19 + ldd r18,Y+7 + ldd r19,Y+8 + std Z+6,r18 + std Z+7,r19 + ldd r18,Y+9 + ldd r19,Y+10 + std Z+8,r18 + std Z+9,r19 + ldd r18,Y+11 + ldd r19,Y+12 + std Z+10,r18 + std Z+11,r19 + ldd r18,Y+13 + ldd r19,Y+14 + std Z+12,r18 + std Z+13,r19 + ldd r18,Y+15 + ldd r19,Y+16 + std Z+14,r18 + std Z+15,r19 + ldd r18,Y+17 + ldd r19,Y+18 + std Z+16,r18 + std Z+17,r19 + ldd r18,Y+19 + ldd r19,Y+20 + std Z+18,r18 + std Z+19,r19 + ldd r18,Y+21 + ldd r19,Y+22 + std Z+20,r18 + std Z+21,r19 + ldd r18,Y+23 + ldd r19,Y+24 + std Z+22,r18 + std Z+23,r19 + adiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_rounds, .-forkskinny_64_192_rounds + + .text +.global forkskinny_64_192_inv_rounds + .type forkskinny_64_192_inv_rounds, @function +forkskinny_64_192_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 38 + ldd r26,Z+24 + ldd r27,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + ld r18,Z + ldd r19,Z+1 + std Y+1,r18 + std Y+2,r19 + ldd r18,Z+2 + ldd r19,Z+3 + std Y+3,r18 + std Y+4,r19 + ldd r18,Z+4 + ldd r19,Z+5 + std Y+5,r18 + std Y+6,r19 + ldd r18,Z+6 + ldd r19,Z+7 + std Y+7,r18 + std Y+8,r19 + ldd r18,Z+8 + ldd r19,Z+9 + std Y+9,r18 + std Y+10,r19 + ldd r18,Z+10 + ldd r19,Z+11 + std Y+11,r18 + std Y+12,r19 + ldd r18,Z+12 + ldd r19,Z+13 + std Y+13,r18 + std Y+14,r19 + ldd r18,Z+14 + ldd r19,Z+15 + std Y+15,r18 + std Y+16,r19 + ldd r18,Z+16 + ldd r19,Z+17 + std Y+17,r18 + std Y+18,r19 + ldd r18,Z+18 + ldd r19,Z+19 + std Y+19,r18 + std Y+20,r19 + ldd r18,Z+20 + ldd r19,Z+21 + std Y+21,r18 + std Y+22,r19 + ldd r18,Z+22 + ldd r19,Z+23 + std Y+23,r18 + std Y+24,r19 + push r31 + push r30 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 + lsl r20 +61: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r8,Y+3 + ldd r9,Y+4 + ldd r10,Y+5 + ldd r11,Y+6 + ldd r12,Y+7 + ldd r13,Y+8 + std Y+1,r10 + std Y+2,r11 + std Y+3,r12 + std Y+4,r13 + mov r11,r18 + ldi r25,240 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+5,r10 + std Y+6,r11 + std Y+7,r12 + std Y+8,r13 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r8,Y+11 + ldd r9,Y+12 + ldd r10,Y+13 + ldd r11,Y+14 + ldd r12,Y+15 + ldd r13,Y+16 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r11,r18 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+13,r10 + std Y+14,r11 + std Y+15,r12 + std Y+16,r13 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r18,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r8,Y+19 + ldd r9,Y+20 + ldd r10,Y+21 + ldd r11,Y+22 + ldd r12,Y+23 + ldd r13,Y+24 + std Y+17,r10 + std Y+18,r11 + std Y+19,r12 + std Y+20,r13 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r11,r18 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+21,r10 + std Y+22,r11 + std Y+23,r12 + std Y+24,r13 + movw r18,r26 + movw r26,r2 + movw r2,r4 + movw r4,r6 + movw r6,r18 + eor r6,r4 + eor r7,r5 + eor r4,r26 + eor r5,r27 + eor r2,r4 + eor r3,r5 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + mov r0,r5 + mov r5,r4 + mov r4,r0 + mov r0,r7 + mov r7,r6 + mov r6,r0 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + ldd r18,Y+1 + ldd r19,Y+2 + eor r26,r18 + eor r27,r19 + ldd r18,Y+3 + ldd r19,Y+4 + eor r2,r18 + eor r3,r19 + ldd r18,Y+9 + ldd r19,Y+10 + eor r26,r18 + eor r27,r19 + ldd r18,Y+11 + ldd r19,Y+12 + eor r2,r18 + eor r3,r19 + ldd r18,Y+17 + ldd r19,Y+18 + eor r26,r18 + eor r27,r19 + ldd r18,Y+19 + ldd r19,Y+20 + eor r2,r18 + eor r3,r19 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r3,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r27,r18 + ldi r18,32 + eor r5,r18 + eor r26,r18 + ldi r30,lo8(table_6) + ldi r31,hi8(table_6) +#if defined(RAMPZ) + ldi r18,hh8(table_6) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + cp r22,r20 + breq 5268f + rjmp 61b +5268: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+24,r26 + std Z+25,r27 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Y+1 + ldd r19,Y+2 + st Z,r18 + std Z+1,r19 + ldd r18,Y+3 + ldd r19,Y+4 + std Z+2,r18 + std Z+3,r19 + ldd r18,Y+5 + ldd r19,Y+6 + std Z+4,r18 + std Z+5,r19 + ldd r18,Y+7 + ldd r19,Y+8 + std Z+6,r18 + std Z+7,r19 + ldd r18,Y+9 + ldd r19,Y+10 + std Z+8,r18 + std Z+9,r19 + ldd r18,Y+11 + ldd r19,Y+12 + std Z+10,r18 + std Z+11,r19 + ldd r18,Y+13 + ldd r19,Y+14 + std Z+12,r18 + std Z+13,r19 + ldd r18,Y+15 + ldd r19,Y+16 + std Z+14,r18 + std Z+15,r19 + ldd r18,Y+17 + ldd r19,Y+18 + std Z+16,r18 + std Z+17,r19 + ldd r18,Y+19 + ldd r19,Y+20 + std Z+18,r18 + std Z+19,r19 + ldd r18,Y+21 + ldd r19,Y+22 + std Z+20,r18 + std Z+21,r19 + ldd r18,Y+23 + ldd r19,Y+24 + std Z+22,r18 + std Z+23,r19 + adiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_inv_rounds, .-forkskinny_64_192_inv_rounds + + .text +.global forkskinny_64_192_forward_tk + .type forkskinny_64_192_forward_tk, @function +forkskinny_64_192_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r28,Z+6 + ldd r29,Z+7 + ldd r2,Z+8 + ldd r3,Z+9 + ldd r4,Z+10 + ldd r5,Z+11 + ldd r6,Z+12 + ldd r7,Z+13 + ldd r8,Z+14 + ldd r9,Z+15 + ldd r10,Z+16 + ldd r11,Z+17 + ldd r12,Z+18 + ldd r13,Z+19 + ldd r14,Z+20 + ldd r15,Z+21 + ldd r24,Z+22 + ldd r25,Z+23 + push r31 + push r30 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +27: + push r19 + push r18 + push r21 + push r20 + mov r19,r27 + swap r19 + andi r19,240 + mov r23,r28 + andi r23,15 + or r19,r23 + mov r18,r27 + andi r18,240 + mov r23,r29 + andi r23,15 + or r18,r23 + mov r21,r26 + andi r21,240 + swap r28 + andi r28,15 + or r21,r28 + mov r20,r29 + andi r20,240 + andi r26,15 + or r20,r26 + pop r28 + pop r29 + pop r26 + pop r27 + push r3 + push r2 + push r5 + push r4 + mov r3,r7 + swap r3 + ldi r17,240 + and r3,r17 + mov r23,r8 + andi r23,15 + or r3,r23 + mov r2,r7 + and r2,r17 + mov r23,r9 + andi r23,15 + or r2,r23 + mov r5,r6 + and r5,r17 + swap r8 + ldi r16,15 + and r8,r16 + or r5,r8 + mov r4,r9 + and r4,r17 + and r6,r16 + or r4,r6 + pop r8 + pop r9 + pop r6 + pop r7 + push r11 + push r10 + push r13 + push r12 + mov r11,r15 + swap r11 + and r11,r17 + mov r23,r24 + andi r23,15 + or r11,r23 + mov r10,r15 + and r10,r17 + mov r23,r25 + andi r23,15 + or r10,r23 + mov r13,r14 + and r13,r17 + swap r24 + andi r24,15 + or r13,r24 + mov r12,r25 + and r12,r17 + and r14,r16 + or r12,r14 + pop r24 + pop r25 + pop r14 + pop r15 + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + dec r22 + breq 5125f + rjmp 27b +5125: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r26 + std Z+5,r27 + std Z+6,r28 + std Z+7,r29 + std Z+8,r2 + std Z+9,r3 + std Z+10,r4 + std Z+11,r5 + std Z+12,r6 + std Z+13,r7 + std Z+14,r8 + std Z+15,r9 + std Z+16,r10 + std Z+17,r11 + std Z+18,r12 + std Z+19,r13 + std Z+20,r14 + std Z+21,r15 + std Z+22,r24 + std Z+23,r25 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_forward_tk, .-forkskinny_64_192_forward_tk + + .text +.global forkskinny_64_192_reverse_tk + .type forkskinny_64_192_reverse_tk, @function +forkskinny_64_192_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r28,Z+6 + ldd r29,Z+7 + ldd r2,Z+8 + ldd r3,Z+9 + ldd r4,Z+10 + ldd r5,Z+11 + ldd r6,Z+12 + ldd r7,Z+13 + ldd r8,Z+14 + ldd r9,Z+15 + ldd r10,Z+16 + ldd r11,Z+17 + ldd r12,Z+18 + ldd r13,Z+19 + ldd r14,Z+20 + ldd r15,Z+21 + ldd r24,Z+22 + ldd r25,Z+23 + push r31 + push r30 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +27: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + push r27 + push r26 + push r29 + push r28 + mov r27,r18 + andi r27,240 + mov r23,r19 + swap r23 + andi r23,15 + or r27,r23 + mov r26,r21 + andi r26,240 + mov r23,r20 + andi r23,15 + or r26,r23 + mov r29,r20 + andi r29,240 + andi r18,15 + or r29,r18 + mov r28,r21 + swap r28 + andi r28,240 + andi r19,15 + or r28,r19 + pop r20 + pop r21 + pop r18 + pop r19 + push r7 + push r6 + push r9 + push r8 + mov r7,r2 + ldi r17,240 + and r7,r17 + mov r23,r3 + swap r23 + andi r23,15 + or r7,r23 + mov r6,r5 + and r6,r17 + mov r23,r4 + andi r23,15 + or r6,r23 + mov r9,r4 + and r9,r17 + ldi r16,15 + and r2,r16 + or r9,r2 + mov r8,r5 + swap r8 + and r8,r17 + and r3,r16 + or r8,r3 + pop r4 + pop r5 + pop r2 + pop r3 + push r15 + push r14 + push r25 + push r24 + mov r15,r10 + and r15,r17 + mov r23,r11 + swap r23 + andi r23,15 + or r15,r23 + mov r14,r13 + and r14,r17 + mov r23,r12 + andi r23,15 + or r14,r23 + mov r25,r12 + andi r25,240 + and r10,r16 + or r25,r10 + mov r24,r13 + swap r24 + andi r24,240 + and r11,r16 + or r24,r11 + pop r12 + pop r13 + pop r10 + pop r11 + dec r22 + breq 5125f + rjmp 27b +5125: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r26 + std Z+5,r27 + std Z+6,r28 + std Z+7,r29 + std Z+8,r2 + std Z+9,r3 + std Z+10,r4 + std Z+11,r5 + std Z+12,r6 + std Z+13,r7 + std Z+14,r8 + std Z+15,r9 + std Z+16,r10 + std Z+17,r11 + std Z+18,r12 + std Z+19,r13 + std Z+20,r14 + std Z+21,r15 + std Z+22,r24 + std Z+23,r25 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_reverse_tk, .-forkskinny_64_192_reverse_tk + +#endif diff --git a/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/rhys/internal-forkskinny.c b/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/rhys/internal-forkskinny.c index b050ff1..6e2ac55 100644 --- a/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/rhys/internal-forkskinny.c +++ b/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/rhys/internal-forkskinny.c @@ -40,35 +40,10 @@ static unsigned char const RC[87] = { 0x4a, 0x14, 0x29, 0x52, 0x24, 0x48, 0x10 }; -/** - * \brief Number of rounds of ForkSkinny-128-256 before forking. - */ -#define FORKSKINNY_128_256_ROUNDS_BEFORE 21 - -/** - * \brief Number of rounds of ForkSkinny-128-256 after forking. - */ -#define FORKSKINNY_128_256_ROUNDS_AFTER 27 - -/** - * \brief State information for ForkSkinny-128-256. - */ -typedef struct -{ - uint32_t TK1[4]; /**< First part of the tweakey */ - uint32_t TK2[4]; /**< Second part of the tweakey */ - uint32_t S[4]; /**< Current block state */ +#if !defined(__AVR__) -} forkskinny_128_256_state_t; - -/** - * \brief Applies one round of ForkSkinny-128-256. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_256_round - (forkskinny_128_256_state_t *state, unsigned round) +void forkskinny_128_256_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -79,137 +54,52 @@ static void forkskinny_128_256_round s2 = state->S[2]; s3 = state->S[3]; - /* Apply the S-box to all cells in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny128_sbox(s0); + skinny128_sbox(s1); + skinny128_sbox(s2); + skinny128_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); + s2 ^= 0x02; + + /* Shift the cells in the rows right, which moves the cell + * values up closer to the MSB. That is, we do a left rotate + * on the word to rotate the cells in the word right */ + s1 = leftRotate8(s1); + s2 = leftRotate16(s2); + s3 = leftRotate24(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + + /* Permute TK1 and TK2 for the next round */ + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + } /* Save the local variables back to the state */ state->S[0] = s0; state->S[1] = s1; state->S[2] = s2; state->S[3] = s3; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(state->TK1); - skinny128_permute_tk(state->TK2); - skinny128_LFSR2(state->TK2[0]); - skinny128_LFSR2(state->TK2[1]); -} - -void forkskinny_128_256_encrypt - (const unsigned char key[32], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_256_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_128_256_ROUNDS_BEFORE; ++round) { - forkskinny_128_256_round(&state, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint32_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&state, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x08040201U; /* Branching constant */ - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - for (round = (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); ++round) { - forkskinny_128_256_round(&state, round); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&state, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - } } -/** - * \brief Applies one round of ForkSkinny-128-256 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_256_inv_round - (forkskinny_128_256_state_t *state, unsigned round) +void forkskinny_128_256_inv_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -220,39 +110,42 @@ static void forkskinny_128_256_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Permute TK1 and TK2 for the next round */ - skinny128_inv_LFSR2(state->TK2[0]); - skinny128_inv_LFSR2(state->TK2[1]); - skinny128_inv_permute_tk(state->TK1); - skinny128_inv_permute_tk(state->TK2); - - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left, which moves the cell - * values down closer to the LSB. That is, we do a right - * rotate on the word to rotate the cells in the word left */ - s1 = rightRotate8(s1); - s2 = rightRotate16(s2); - s3 = rightRotate24(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1 and TK2 for the next round */ + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left, which moves the cell + * values down closer to the LSB. That is, we do a right + * rotate on the word to rotate the cells in the word left */ + s1 = rightRotate8(s1); + s2 = rightRotate16(s2); + s3 = rightRotate24(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); + s2 ^= 0x02; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny128_inv_sbox(s0); + skinny128_inv_sbox(s1); + skinny128_inv_sbox(s2); + skinny128_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -261,115 +154,64 @@ static void forkskinny_128_256_inv_round state->S[3] = s3; } -void forkskinny_128_256_decrypt - (const unsigned char key[32], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_128_256_forward_tk + (forkskinny_128_256_state_t *state, unsigned rounds) { - forkskinny_128_256_state_t state; - forkskinny_128_256_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); ++round) { - skinny128_permute_tk(state.TK1); - skinny128_permute_tk(state.TK2); - skinny128_LFSR2(state.TK2[0]); - skinny128_LFSR2(state.TK2[1]); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR2(state->TK2[2]); + skinny128_LFSR2(state->TK2[3]); + } + rounds -= 16; } - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); - round > (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); --round) { - forkskinny_128_256_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + --rounds; } +} - /* Remove the branching constant */ - state.S[0] ^= 0x08040201U; - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_128_256_ROUNDS_AFTER; ++round) { - skinny128_inv_LFSR2(state.TK2[0]); - skinny128_inv_LFSR2(state.TK2[1]); - skinny128_inv_permute_tk(state.TK1); - skinny128_inv_permute_tk(state.TK2); +void forkskinny_128_256_reverse_tk + (forkskinny_128_256_state_t *state, unsigned rounds) +{ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR2(state->TK2[2]); + skinny128_inv_LFSR2(state->TK2[3]); + } + rounds -= 16; } - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_128_256_inv_round(&state, round - 1); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&fstate, round); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + --rounds; } - le_store_word32(output_right, fstate.S[0]); - le_store_word32(output_right + 4, fstate.S[1]); - le_store_word32(output_right + 8, fstate.S[2]); - le_store_word32(output_right + 12, fstate.S[3]); } -/** - * \brief Number of rounds of ForkSkinny-128-384 before forking. - */ -#define FORKSKINNY_128_384_ROUNDS_BEFORE 25 - -/** - * \brief Number of rounds of ForkSkinny-128-384 after forking. - */ -#define FORKSKINNY_128_384_ROUNDS_AFTER 31 - -/** - * \brief State information for ForkSkinny-128-384. - */ -typedef struct -{ - uint32_t TK1[4]; /**< First part of the tweakey */ - uint32_t TK2[4]; /**< Second part of the tweakey */ - uint32_t TK3[4]; /**< Third part of the tweakey */ - uint32_t S[4]; /**< Current block state */ - -} forkskinny_128_384_state_t; - -/** - * \brief Applies one round of ForkSkinny-128-384. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_384_round - (forkskinny_128_384_state_t *state, unsigned round) +void forkskinny_128_384_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -380,145 +222,56 @@ static void forkskinny_128_384_round s2 = state->S[2]; s3 = state->S[3]; - /* Apply the S-box to all cells in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - (rc & 0x0F) ^ 0x00020000; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny128_sbox(s0); + skinny128_sbox(s1); + skinny128_sbox(s2); + skinny128_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + (rc & 0x0F) ^ 0x00020000; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); + s2 ^= 0x02; + + /* Shift the cells in the rows right, which moves the cell + * values up closer to the MSB. That is, we do a left rotate + * on the word to rotate the cells in the word right */ + s1 = leftRotate8(s1); + s2 = leftRotate16(s2); + s3 = leftRotate24(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + + /* Permute TK1, TK2, and TK3 for the next round */ + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_permute_tk(state->TK3); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + } /* Save the local variables back to the state */ state->S[0] = s0; state->S[1] = s1; state->S[2] = s2; state->S[3] = s3; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny128_permute_tk(state->TK1); - skinny128_permute_tk(state->TK2); - skinny128_permute_tk(state->TK3); - skinny128_LFSR2(state->TK2[0]); - skinny128_LFSR2(state->TK2[1]); - skinny128_LFSR3(state->TK3[0]); - skinny128_LFSR3(state->TK3[1]); } -void forkskinny_128_384_encrypt - (const unsigned char key[48], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_384_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.TK3[0] = le_load_word32(key + 32); - state.TK3[1] = le_load_word32(key + 36); - state.TK3[2] = le_load_word32(key + 40); - state.TK3[3] = le_load_word32(key + 44); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_128_384_ROUNDS_BEFORE; ++round) { - forkskinny_128_384_round(&state, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint32_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&state, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x08040201U; /* Branching constant */ - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - for (round = (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); ++round) { - forkskinny_128_384_round(&state, round); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&state, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - } -} - -/** - * \brief Applies one round of ForkSkinny-128-384 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_384_inv_round - (forkskinny_128_384_state_t *state, unsigned round) +void forkskinny_128_384_inv_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -529,43 +282,46 @@ static void forkskinny_128_384_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Permute TK1 and TK2 for the next round */ - skinny128_inv_LFSR2(state->TK2[0]); - skinny128_inv_LFSR2(state->TK2[1]); - skinny128_inv_LFSR3(state->TK3[0]); - skinny128_inv_LFSR3(state->TK3[1]); - skinny128_inv_permute_tk(state->TK1); - skinny128_inv_permute_tk(state->TK2); - skinny128_inv_permute_tk(state->TK3); - - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left, which moves the cell - * values down closer to the LSB. That is, we do a right - * rotate on the word to rotate the cells in the word left */ - s1 = rightRotate8(s1); - s2 = rightRotate16(s2); - s3 = rightRotate24(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - (rc & 0x0F) ^ 0x00020000; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1 and TK2 for the next round */ + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + skinny128_inv_permute_tk(state->TK3); + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left, which moves the cell + * values down closer to the LSB. That is, we do a right + * rotate on the word to rotate the cells in the word left */ + s1 = rightRotate8(s1); + s2 = rightRotate16(s2); + s3 = rightRotate24(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + (rc & 0x0F) ^ 0x00020000; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); + s2 ^= 0x02; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny128_inv_sbox(s0); + skinny128_inv_sbox(s1); + skinny128_inv_sbox(s2); + skinny128_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -574,128 +330,78 @@ static void forkskinny_128_384_inv_round state->S[3] = s3; } -void forkskinny_128_384_decrypt - (const unsigned char key[48], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_128_384_forward_tk + (forkskinny_128_384_state_t *state, unsigned rounds) { - forkskinny_128_384_state_t state; - forkskinny_128_384_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.TK3[0] = le_load_word32(key + 32); - state.TK3[1] = le_load_word32(key + 36); - state.TK3[2] = le_load_word32(key + 40); - state.TK3[3] = le_load_word32(key + 44); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); ++round) { - skinny128_permute_tk(state.TK1); - skinny128_permute_tk(state.TK2); - skinny128_permute_tk(state.TK3); - skinny128_LFSR2(state.TK2[0]); - skinny128_LFSR2(state.TK2[1]); - skinny128_LFSR3(state.TK3[0]); - skinny128_LFSR3(state.TK3[1]); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR2(state->TK2[2]); + skinny128_LFSR2(state->TK2[3]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + skinny128_LFSR3(state->TK3[2]); + skinny128_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); - round > (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); --round) { - forkskinny_128_384_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_permute_tk(state->TK3); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + --rounds; } +} - /* Remove the branching constant */ - state.S[0] ^= 0x08040201U; - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_128_384_ROUNDS_AFTER; ++round) { - skinny128_inv_LFSR2(state.TK2[0]); - skinny128_inv_LFSR2(state.TK2[1]); - skinny128_inv_LFSR3(state.TK3[0]); - skinny128_inv_LFSR3(state.TK3[1]); - skinny128_inv_permute_tk(state.TK1); - skinny128_inv_permute_tk(state.TK2); - skinny128_inv_permute_tk(state.TK3); +void forkskinny_128_384_reverse_tk + (forkskinny_128_384_state_t *state, unsigned rounds) +{ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR2(state->TK2[2]); + skinny128_inv_LFSR2(state->TK2[3]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_LFSR3(state->TK3[2]); + skinny128_inv_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_128_384_inv_round(&state, round - 1); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&fstate, round); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + skinny128_inv_permute_tk(state->TK3); + --rounds; } - le_store_word32(output_right, fstate.S[0]); - le_store_word32(output_right + 4, fstate.S[1]); - le_store_word32(output_right + 8, fstate.S[2]); - le_store_word32(output_right + 12, fstate.S[3]); } -/** - * \brief Number of rounds of ForkSkinny-64-192 before forking. - */ -#define FORKSKINNY_64_192_ROUNDS_BEFORE 17 - -/** - * \brief Number of rounds of ForkSkinny-64-192 after forking. - */ -#define FORKSKINNY_64_192_ROUNDS_AFTER 23 - -/** - * \brief State information for ForkSkinny-64-192. - */ -typedef struct -{ - uint16_t TK1[4]; /**< First part of the tweakey */ - uint16_t TK2[4]; /**< Second part of the tweakey */ - uint16_t TK3[4]; /**< Third part of the tweakey */ - uint16_t S[4]; /**< Current block state */ - -} forkskinny_64_192_state_t; - -/** - * \brief Applies one round of ForkSkinny-64-192. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - * - * Note: The cells of each row are order in big-endian nibble order - * so it is easiest to manage the rows in bit-endian byte order. - */ -static void forkskinny_64_192_round - (forkskinny_64_192_state_t *state, unsigned round) +void forkskinny_64_192_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last) { uint16_t s0, s1, s2, s3, temp; uint8_t rc; @@ -706,144 +412,55 @@ static void forkskinny_64_192_round s2 = state->S[2]; s3 = state->S[3]; - /* Apply the S-box to all cells in the state */ - skinny64_sbox(s0); - skinny64_sbox(s1); - skinny64_sbox(s2); - skinny64_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - ((rc & 0x0F) << 12) ^ 0x0020; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ - ((rc & 0x70) << 8); - s2 ^= 0x2000; - - /* Shift the cells in the rows right */ - s1 = rightRotate4_16(s1); - s2 = rightRotate8_16(s2); - s3 = rightRotate12_16(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny64_sbox(s0); + skinny64_sbox(s1); + skinny64_sbox(s2); + skinny64_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + ((rc & 0x0F) << 12) ^ 0x0020; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ + ((rc & 0x70) << 8); + s2 ^= 0x2000; + + /* Shift the cells in the rows right */ + s1 = rightRotate4_16(s1); + s2 = rightRotate8_16(s2); + s3 = rightRotate12_16(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + + /* Permute TK1, TK2, and TK3 for the next round */ + skinny64_permute_tk(state->TK1); + skinny64_permute_tk(state->TK2); + skinny64_permute_tk(state->TK3); + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + } /* Save the local variables back to the state */ state->S[0] = s0; state->S[1] = s1; state->S[2] = s2; state->S[3] = s3; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny64_permute_tk(state->TK1); - skinny64_permute_tk(state->TK2); - skinny64_permute_tk(state->TK3); - skinny64_LFSR2(state->TK2[0]); - skinny64_LFSR2(state->TK2[1]); - skinny64_LFSR3(state->TK3[0]); - skinny64_LFSR3(state->TK3[1]); } -void forkskinny_64_192_encrypt - (const unsigned char key[24], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_64_192_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = be_load_word16(key); - state.TK1[1] = be_load_word16(key + 2); - state.TK1[2] = be_load_word16(key + 4); - state.TK1[3] = be_load_word16(key + 6); - state.TK2[0] = be_load_word16(key + 8); - state.TK2[1] = be_load_word16(key + 10); - state.TK2[2] = be_load_word16(key + 12); - state.TK2[3] = be_load_word16(key + 14); - state.TK3[0] = be_load_word16(key + 16); - state.TK3[1] = be_load_word16(key + 18); - state.TK3[2] = be_load_word16(key + 20); - state.TK3[3] = be_load_word16(key + 22); - state.S[0] = be_load_word16(input); - state.S[1] = be_load_word16(input + 2); - state.S[2] = be_load_word16(input + 4); - state.S[3] = be_load_word16(input + 6); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_64_192_ROUNDS_BEFORE; ++round) { - forkskinny_64_192_round(&state, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint16_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_right, state.S[0]); - be_store_word16(output_right + 2, state.S[1]); - be_store_word16(output_right + 4, state.S[2]); - be_store_word16(output_right + 6, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x1249U; /* Branching constant */ - state.S[1] ^= 0x36daU; - state.S[2] ^= 0x5b7fU; - state.S[3] ^= 0xec81U; - for (round = (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_left, state.S[0]); - be_store_word16(output_left + 2, state.S[1]); - be_store_word16(output_left + 4, state.S[2]); - be_store_word16(output_left + 6, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_right, state.S[0]); - be_store_word16(output_right + 2, state.S[1]); - be_store_word16(output_right + 4, state.S[2]); - be_store_word16(output_right + 6, state.S[3]); - } -} - -/** - * \brief Applies one round of ForkSkinny-64-192 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_64_192_inv_round - (forkskinny_64_192_state_t *state, unsigned round) +void forkskinny_64_192_inv_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last) { uint16_t s0, s1, s2, s3, temp; uint8_t rc; @@ -854,42 +471,45 @@ static void forkskinny_64_192_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Permute TK1, TK2, and TK3 for the next round */ - skinny64_inv_LFSR2(state->TK2[0]); - skinny64_inv_LFSR2(state->TK2[1]); - skinny64_inv_LFSR3(state->TK3[0]); - skinny64_inv_LFSR3(state->TK3[1]); - skinny64_inv_permute_tk(state->TK1); - skinny64_inv_permute_tk(state->TK2); - skinny64_inv_permute_tk(state->TK3); - - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left */ - s1 = leftRotate4_16(s1); - s2 = leftRotate8_16(s2); - s3 = leftRotate12_16(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - ((rc & 0x0F) << 12) ^ 0x0020; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ - ((rc & 0x70) << 8); - s2 ^= 0x2000; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny64_inv_sbox(s0); - skinny64_inv_sbox(s1); - skinny64_inv_sbox(s2); - skinny64_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1, TK2, and TK3 for the next round */ + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_permute_tk(state->TK1); + skinny64_inv_permute_tk(state->TK2); + skinny64_inv_permute_tk(state->TK3); + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left */ + s1 = leftRotate4_16(s1); + s2 = leftRotate8_16(s2); + s3 = leftRotate12_16(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + ((rc & 0x0F) << 12) ^ 0x0020; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ + ((rc & 0x70) << 8); + s2 ^= 0x2000; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny64_inv_sbox(s0); + skinny64_inv_sbox(s1); + skinny64_inv_sbox(s2); + skinny64_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -898,91 +518,74 @@ static void forkskinny_64_192_inv_round state->S[3] = s3; } -void forkskinny_64_192_decrypt - (const unsigned char key[24], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_64_192_forward_tk + (forkskinny_64_192_state_t *state, unsigned rounds) { - forkskinny_64_192_state_t state; - forkskinny_64_192_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = be_load_word16(key); - state.TK1[1] = be_load_word16(key + 2); - state.TK1[2] = be_load_word16(key + 4); - state.TK1[3] = be_load_word16(key + 6); - state.TK2[0] = be_load_word16(key + 8); - state.TK2[1] = be_load_word16(key + 10); - state.TK2[2] = be_load_word16(key + 12); - state.TK2[3] = be_load_word16(key + 14); - state.TK3[0] = be_load_word16(key + 16); - state.TK3[1] = be_load_word16(key + 18); - state.TK3[2] = be_load_word16(key + 20); - state.TK3[3] = be_load_word16(key + 22); - state.S[0] = be_load_word16(input); - state.S[1] = be_load_word16(input + 2); - state.S[2] = be_load_word16(input + 4); - state.S[3] = be_load_word16(input + 6); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); ++round) { - skinny64_permute_tk(state.TK1); - skinny64_permute_tk(state.TK2); - skinny64_permute_tk(state.TK3); - skinny64_LFSR2(state.TK2[0]); - skinny64_LFSR2(state.TK2[1]); - skinny64_LFSR3(state.TK3[0]); - skinny64_LFSR3(state.TK3[1]); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny64_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR2(state->TK2[2]); + skinny64_LFSR2(state->TK2[3]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + skinny64_LFSR3(state->TK3[2]); + skinny64_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); - round > (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); --round) { - forkskinny_64_192_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny64_permute_tk(state->TK1); + skinny64_permute_tk(state->TK2); + skinny64_permute_tk(state->TK3); + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + --rounds; } +} - /* Remove the branching constant */ - state.S[0] ^= 0x1249U; - state.S[1] ^= 0x36daU; - state.S[2] ^= 0x5b7fU; - state.S[3] ^= 0xec81U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_64_192_ROUNDS_AFTER; ++round) { - skinny64_inv_LFSR2(state.TK2[0]); - skinny64_inv_LFSR2(state.TK2[1]); - skinny64_inv_LFSR3(state.TK3[0]); - skinny64_inv_LFSR3(state.TK3[1]); - skinny64_inv_permute_tk(state.TK1); - skinny64_inv_permute_tk(state.TK2); - skinny64_inv_permute_tk(state.TK3); +void forkskinny_64_192_reverse_tk + (forkskinny_64_192_state_t *state, unsigned rounds) +{ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny64_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR2(state->TK2[2]); + skinny64_inv_LFSR2(state->TK2[3]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_LFSR3(state->TK3[2]); + skinny64_inv_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_64_192_inv_round(&state, round - 1); - } - be_store_word16(output_left, state.S[0]); - be_store_word16(output_left + 2, state.S[1]); - be_store_word16(output_left + 4, state.S[2]); - be_store_word16(output_left + 6, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&fstate, round); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_permute_tk(state->TK1); + skinny64_inv_permute_tk(state->TK2); + skinny64_inv_permute_tk(state->TK3); + --rounds; } - be_store_word16(output_right, fstate.S[0]); - be_store_word16(output_right + 2, fstate.S[1]); - be_store_word16(output_right + 4, fstate.S[2]); - be_store_word16(output_right + 6, fstate.S[3]); } + +#endif /* !__AVR__ */ diff --git a/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/rhys/internal-forkskinny.h b/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/rhys/internal-forkskinny.h index 0c1a707..e3014d3 100644 --- a/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/rhys/internal-forkskinny.h +++ b/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/rhys/internal-forkskinny.h @@ -23,6 +23,8 @@ #ifndef LW_INTERNAL_FORKSKINNY_H #define LW_INTERNAL_FORKSKINNY_H +#include "internal-util.h" + /** * \file internal-forkskinny.h * \brief ForkSkinny block cipher family. @@ -39,6 +41,158 @@ extern "C" { #endif /** + * \brief State information for ForkSkinny-128-256. + */ +typedef struct +{ + uint32_t TK1[4]; /**< First part of the tweakey */ + uint32_t TK2[4]; /**< Second part of the tweakey */ + uint32_t S[4]; /**< Current block state */ + +} forkskinny_128_256_state_t; + +/** + * \brief State information for ForkSkinny-128-384. + */ +typedef struct +{ + uint32_t TK1[4]; /**< First part of the tweakey */ + uint32_t TK2[4]; /**< Second part of the tweakey */ + uint32_t TK3[4]; /**< Third part of the tweakey */ + uint32_t S[4]; /**< Current block state */ + +} forkskinny_128_384_state_t; + +/** + * \brief State information for ForkSkinny-64-192. + */ +typedef struct +{ + uint16_t TK1[4]; /**< First part of the tweakey */ + uint16_t TK2[4]; /**< Second part of the tweakey */ + uint16_t TK3[4]; /**< Third part of the tweakey */ + uint16_t S[4]; /**< Current block state */ + +} forkskinny_64_192_state_t; + +/** + * \brief Applies several rounds of ForkSkinny-128-256. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + */ +void forkskinny_128_256_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-128-256 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_128_256_inv_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-128-256. + * + * \param state Points to the ForkSkinny-128-256 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_128_256_forward_tk + (forkskinny_128_256_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-128-256. + * + * \param state Points to the ForkSkinny-128-256 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_128_256_reverse_tk + (forkskinny_128_256_state_t *state, unsigned rounds); + +/** + * \brief Applies several rounds of ForkSkinny-128-384. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + */ +void forkskinny_128_384_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-128-384 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_128_384_inv_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-128-384. + * + * \param state Points to the ForkSkinny-128-384 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_128_384_forward_tk + (forkskinny_128_384_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-128-384. + * + * \param state Points to the ForkSkinny-128-384 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_128_384_reverse_tk + (forkskinny_128_384_state_t *state, unsigned rounds); + +/** + * \brief Applies several rounds of ForkSkinny-64-192. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + * + * Note: The cells of each row are ordered in big-endian nibble order + * so it is simplest to manage the rows in big-endian byte order. + */ +void forkskinny_64_192_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-64-192 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_64_192_inv_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-64-192. + * + * \param state Points to the ForkSkinny-64-192 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_64_192_forward_tk + (forkskinny_64_192_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-64-192. + * + * \param state Points to the ForkSkinny-64-192 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_64_192_reverse_tk + (forkskinny_64_192_state_t *state, unsigned rounds); + +/** * \brief Encrypts a block of plaintext with ForkSkinny-128-256. * * \param key 256-bit tweakey for ForkSkinny-128-256. diff --git a/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/rhys/internal-skinnyutil.h b/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/rhys/internal-skinnyutil.h index 83136cb..8a5296d 100644 --- a/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/rhys/internal-skinnyutil.h +++ b/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/rhys/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: diff --git a/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/rhys/forkae.c b/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/rhys/forkae.c index 4a9671a..49e7610 100644 --- a/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/rhys/forkae.c +++ b/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/rhys/forkae.c @@ -22,7 +22,6 @@ #include "forkae.h" #include "internal-forkskinny.h" -#include "internal-util.h" #include aead_cipher_t const forkae_paef_64_192_cipher = { @@ -138,3 +137,476 @@ aead_cipher_t const forkae_saef_128_256_cipher = { #define FORKAE_TWEAKEY_REDUCED_SIZE 32 #define FORKAE_BLOCK_FUNC forkskinny_128_256 #include "internal-forkae-saef.h" + +/* Helper functions to implement the forking encrypt/decrypt block operations + * on top of the basic "perform N rounds" functions in internal-forkskinny.c */ + +/** + * \brief Number of rounds of ForkSkinny-128-256 before forking. + */ +#define FORKSKINNY_128_256_ROUNDS_BEFORE 21 + +/** + * \brief Number of rounds of ForkSkinny-128-256 after forking. + */ +#define FORKSKINNY_128_256_ROUNDS_AFTER 27 + +void forkskinny_128_256_encrypt + (const unsigned char key[32], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_256_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Run all of the rounds before the forking point */ + forkskinny_128_256_rounds(&state, 0, FORKSKINNY_128_256_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint32_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x08040201U; /* Branching constant */ + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + } +} + +void forkskinny_128_256_decrypt + (const unsigned char key[32], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_256_state_t state; + forkskinny_128_256_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_128_256_forward_tk + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_128_256_inv_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x08040201U; + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_128_256_reverse_tk(&state, FORKSKINNY_128_256_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_128_256_inv_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, 0); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_128_256_rounds + (&fstate, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, fstate.S[0]); + le_store_word32(output_right + 4, fstate.S[1]); + le_store_word32(output_right + 8, fstate.S[2]); + le_store_word32(output_right + 12, fstate.S[3]); +} + +/** + * \brief Number of rounds of ForkSkinny-128-384 before forking. + */ +#define FORKSKINNY_128_384_ROUNDS_BEFORE 25 + +/** + * \brief Number of rounds of ForkSkinny-128-384 after forking. + */ +#define FORKSKINNY_128_384_ROUNDS_AFTER 31 + +void forkskinny_128_384_encrypt + (const unsigned char key[48], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_384_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.TK3[0] = le_load_word32(key + 32); + state.TK3[1] = le_load_word32(key + 36); + state.TK3[2] = le_load_word32(key + 40); + state.TK3[3] = le_load_word32(key + 44); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Run all of the rounds before the forking point */ + forkskinny_128_384_rounds(&state, 0, FORKSKINNY_128_384_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint32_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x08040201U; /* Branching constant */ + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + } +} + +void forkskinny_128_384_decrypt + (const unsigned char key[48], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_384_state_t state; + forkskinny_128_384_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.TK3[0] = le_load_word32(key + 32); + state.TK3[1] = le_load_word32(key + 36); + state.TK3[2] = le_load_word32(key + 40); + state.TK3[3] = le_load_word32(key + 44); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_128_384_forward_tk + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_128_384_inv_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x08040201U; + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_128_384_reverse_tk(&state, FORKSKINNY_128_384_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_128_384_inv_rounds(&state, FORKSKINNY_128_384_ROUNDS_BEFORE, 0); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_128_384_rounds + (&fstate, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, fstate.S[0]); + le_store_word32(output_right + 4, fstate.S[1]); + le_store_word32(output_right + 8, fstate.S[2]); + le_store_word32(output_right + 12, fstate.S[3]); +} + +/** + * \brief Number of rounds of ForkSkinny-64-192 before forking. + */ +#define FORKSKINNY_64_192_ROUNDS_BEFORE 17 + +/** + * \brief Number of rounds of ForkSkinny-64-192 after forking. + */ +#define FORKSKINNY_64_192_ROUNDS_AFTER 23 + +void forkskinny_64_192_encrypt + (const unsigned char key[24], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_64_192_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = be_load_word16(key); + state.TK1[1] = be_load_word16(key + 2); + state.TK1[2] = be_load_word16(key + 4); + state.TK1[3] = be_load_word16(key + 6); + state.TK2[0] = be_load_word16(key + 8); + state.TK2[1] = be_load_word16(key + 10); + state.TK2[2] = be_load_word16(key + 12); + state.TK2[3] = be_load_word16(key + 14); + state.TK3[0] = be_load_word16(key + 16); + state.TK3[1] = be_load_word16(key + 18); + state.TK3[2] = be_load_word16(key + 20); + state.TK3[3] = be_load_word16(key + 22); + state.S[0] = be_load_word16(input); + state.S[1] = be_load_word16(input + 2); + state.S[2] = be_load_word16(input + 4); + state.S[3] = be_load_word16(input + 6); + + /* Run all of the rounds before the forking point */ + forkskinny_64_192_rounds(&state, 0, FORKSKINNY_64_192_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint16_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, state.S[0]); + be_store_word16(output_right + 2, state.S[1]); + be_store_word16(output_right + 4, state.S[2]); + be_store_word16(output_right + 6, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x1249U; /* Branching constant */ + state.S[1] ^= 0x36daU; + state.S[2] ^= 0x5b7fU; + state.S[3] ^= 0xec81U; + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2); + be_store_word16(output_left, state.S[0]); + be_store_word16(output_left + 2, state.S[1]); + be_store_word16(output_left + 4, state.S[2]); + be_store_word16(output_left + 6, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, state.S[0]); + be_store_word16(output_right + 2, state.S[1]); + be_store_word16(output_right + 4, state.S[2]); + be_store_word16(output_right + 6, state.S[3]); + } +} + +void forkskinny_64_192_decrypt + (const unsigned char key[24], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_64_192_state_t state; + forkskinny_64_192_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = be_load_word16(key); + state.TK1[1] = be_load_word16(key + 2); + state.TK1[2] = be_load_word16(key + 4); + state.TK1[3] = be_load_word16(key + 6); + state.TK2[0] = be_load_word16(key + 8); + state.TK2[1] = be_load_word16(key + 10); + state.TK2[2] = be_load_word16(key + 12); + state.TK2[3] = be_load_word16(key + 14); + state.TK3[0] = be_load_word16(key + 16); + state.TK3[1] = be_load_word16(key + 18); + state.TK3[2] = be_load_word16(key + 20); + state.TK3[3] = be_load_word16(key + 22); + state.S[0] = be_load_word16(input); + state.S[1] = be_load_word16(input + 2); + state.S[2] = be_load_word16(input + 4); + state.S[3] = be_load_word16(input + 6); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_64_192_forward_tk + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_64_192_inv_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x1249U; + state.S[1] ^= 0x36daU; + state.S[2] ^= 0x5b7fU; + state.S[3] ^= 0xec81U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_64_192_reverse_tk(&state, FORKSKINNY_64_192_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_64_192_inv_rounds(&state, FORKSKINNY_64_192_ROUNDS_BEFORE, 0); + be_store_word16(output_left, state.S[0]); + be_store_word16(output_left + 2, state.S[1]); + be_store_word16(output_left + 4, state.S[2]); + be_store_word16(output_left + 6, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_64_192_rounds + (&fstate, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, fstate.S[0]); + be_store_word16(output_right + 2, fstate.S[1]); + be_store_word16(output_right + 4, fstate.S[2]); + be_store_word16(output_right + 6, fstate.S[3]); +} diff --git a/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/rhys/internal-forkskinny-avr.S b/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/rhys/internal-forkskinny-avr.S new file mode 100644 index 0000000..c7e0b37 --- /dev/null +++ b/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/rhys/internal-forkskinny-avr.S @@ -0,0 +1,8880 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_0, @object + .size table_0, 256 +table_0: + .byte 101 + .byte 76 + .byte 106 + .byte 66 + .byte 75 + .byte 99 + .byte 67 + .byte 107 + .byte 85 + .byte 117 + .byte 90 + .byte 122 + .byte 83 + .byte 115 + .byte 91 + .byte 123 + .byte 53 + .byte 140 + .byte 58 + .byte 129 + .byte 137 + .byte 51 + .byte 128 + .byte 59 + .byte 149 + .byte 37 + .byte 152 + .byte 42 + .byte 144 + .byte 35 + .byte 153 + .byte 43 + .byte 229 + .byte 204 + .byte 232 + .byte 193 + .byte 201 + .byte 224 + .byte 192 + .byte 233 + .byte 213 + .byte 245 + .byte 216 + .byte 248 + .byte 208 + .byte 240 + .byte 217 + .byte 249 + .byte 165 + .byte 28 + .byte 168 + .byte 18 + .byte 27 + .byte 160 + .byte 19 + .byte 169 + .byte 5 + .byte 181 + .byte 10 + .byte 184 + .byte 3 + .byte 176 + .byte 11 + .byte 185 + .byte 50 + .byte 136 + .byte 60 + .byte 133 + .byte 141 + .byte 52 + .byte 132 + .byte 61 + .byte 145 + .byte 34 + .byte 156 + .byte 44 + .byte 148 + .byte 36 + .byte 157 + .byte 45 + .byte 98 + .byte 74 + .byte 108 + .byte 69 + .byte 77 + .byte 100 + .byte 68 + .byte 109 + .byte 82 + .byte 114 + .byte 92 + .byte 124 + .byte 84 + .byte 116 + .byte 93 + .byte 125 + .byte 161 + .byte 26 + .byte 172 + .byte 21 + .byte 29 + .byte 164 + .byte 20 + .byte 173 + .byte 2 + .byte 177 + .byte 12 + .byte 188 + .byte 4 + .byte 180 + .byte 13 + .byte 189 + .byte 225 + .byte 200 + .byte 236 + .byte 197 + .byte 205 + .byte 228 + .byte 196 + .byte 237 + .byte 209 + .byte 241 + .byte 220 + .byte 252 + .byte 212 + .byte 244 + .byte 221 + .byte 253 + .byte 54 + .byte 142 + .byte 56 + .byte 130 + .byte 139 + .byte 48 + .byte 131 + .byte 57 + .byte 150 + .byte 38 + .byte 154 + .byte 40 + .byte 147 + .byte 32 + .byte 155 + .byte 41 + .byte 102 + .byte 78 + .byte 104 + .byte 65 + .byte 73 + .byte 96 + .byte 64 + .byte 105 + .byte 86 + .byte 118 + .byte 88 + .byte 120 + .byte 80 + .byte 112 + .byte 89 + .byte 121 + .byte 166 + .byte 30 + .byte 170 + .byte 17 + .byte 25 + .byte 163 + .byte 16 + .byte 171 + .byte 6 + .byte 182 + .byte 8 + .byte 186 + .byte 0 + .byte 179 + .byte 9 + .byte 187 + .byte 230 + .byte 206 + .byte 234 + .byte 194 + .byte 203 + .byte 227 + .byte 195 + .byte 235 + .byte 214 + .byte 246 + .byte 218 + .byte 250 + .byte 211 + .byte 243 + .byte 219 + .byte 251 + .byte 49 + .byte 138 + .byte 62 + .byte 134 + .byte 143 + .byte 55 + .byte 135 + .byte 63 + .byte 146 + .byte 33 + .byte 158 + .byte 46 + .byte 151 + .byte 39 + .byte 159 + .byte 47 + .byte 97 + .byte 72 + .byte 110 + .byte 70 + .byte 79 + .byte 103 + .byte 71 + .byte 111 + .byte 81 + .byte 113 + .byte 94 + .byte 126 + .byte 87 + .byte 119 + .byte 95 + .byte 127 + .byte 162 + .byte 24 + .byte 174 + .byte 22 + .byte 31 + .byte 167 + .byte 23 + .byte 175 + .byte 1 + .byte 178 + .byte 14 + .byte 190 + .byte 7 + .byte 183 + .byte 15 + .byte 191 + .byte 226 + .byte 202 + .byte 238 + .byte 198 + .byte 207 + .byte 231 + .byte 199 + .byte 239 + .byte 210 + .byte 242 + .byte 222 + .byte 254 + .byte 215 + .byte 247 + .byte 223 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_1, @object + .size table_1, 256 +table_1: + .byte 172 + .byte 232 + .byte 104 + .byte 60 + .byte 108 + .byte 56 + .byte 168 + .byte 236 + .byte 170 + .byte 174 + .byte 58 + .byte 62 + .byte 106 + .byte 110 + .byte 234 + .byte 238 + .byte 166 + .byte 163 + .byte 51 + .byte 54 + .byte 102 + .byte 99 + .byte 227 + .byte 230 + .byte 225 + .byte 164 + .byte 97 + .byte 52 + .byte 49 + .byte 100 + .byte 161 + .byte 228 + .byte 141 + .byte 201 + .byte 73 + .byte 29 + .byte 77 + .byte 25 + .byte 137 + .byte 205 + .byte 139 + .byte 143 + .byte 27 + .byte 31 + .byte 75 + .byte 79 + .byte 203 + .byte 207 + .byte 133 + .byte 192 + .byte 64 + .byte 21 + .byte 69 + .byte 16 + .byte 128 + .byte 197 + .byte 130 + .byte 135 + .byte 18 + .byte 23 + .byte 66 + .byte 71 + .byte 194 + .byte 199 + .byte 150 + .byte 147 + .byte 3 + .byte 6 + .byte 86 + .byte 83 + .byte 211 + .byte 214 + .byte 209 + .byte 148 + .byte 81 + .byte 4 + .byte 1 + .byte 84 + .byte 145 + .byte 212 + .byte 156 + .byte 216 + .byte 88 + .byte 12 + .byte 92 + .byte 8 + .byte 152 + .byte 220 + .byte 154 + .byte 158 + .byte 10 + .byte 14 + .byte 90 + .byte 94 + .byte 218 + .byte 222 + .byte 149 + .byte 208 + .byte 80 + .byte 5 + .byte 85 + .byte 0 + .byte 144 + .byte 213 + .byte 146 + .byte 151 + .byte 2 + .byte 7 + .byte 82 + .byte 87 + .byte 210 + .byte 215 + .byte 157 + .byte 217 + .byte 89 + .byte 13 + .byte 93 + .byte 9 + .byte 153 + .byte 221 + .byte 155 + .byte 159 + .byte 11 + .byte 15 + .byte 91 + .byte 95 + .byte 219 + .byte 223 + .byte 22 + .byte 19 + .byte 131 + .byte 134 + .byte 70 + .byte 67 + .byte 195 + .byte 198 + .byte 65 + .byte 20 + .byte 193 + .byte 132 + .byte 17 + .byte 68 + .byte 129 + .byte 196 + .byte 28 + .byte 72 + .byte 200 + .byte 140 + .byte 76 + .byte 24 + .byte 136 + .byte 204 + .byte 26 + .byte 30 + .byte 138 + .byte 142 + .byte 74 + .byte 78 + .byte 202 + .byte 206 + .byte 53 + .byte 96 + .byte 224 + .byte 165 + .byte 101 + .byte 48 + .byte 160 + .byte 229 + .byte 50 + .byte 55 + .byte 162 + .byte 167 + .byte 98 + .byte 103 + .byte 226 + .byte 231 + .byte 61 + .byte 105 + .byte 233 + .byte 173 + .byte 109 + .byte 57 + .byte 169 + .byte 237 + .byte 59 + .byte 63 + .byte 171 + .byte 175 + .byte 107 + .byte 111 + .byte 235 + .byte 239 + .byte 38 + .byte 35 + .byte 179 + .byte 182 + .byte 118 + .byte 115 + .byte 243 + .byte 246 + .byte 113 + .byte 36 + .byte 241 + .byte 180 + .byte 33 + .byte 116 + .byte 177 + .byte 244 + .byte 44 + .byte 120 + .byte 248 + .byte 188 + .byte 124 + .byte 40 + .byte 184 + .byte 252 + .byte 42 + .byte 46 + .byte 186 + .byte 190 + .byte 122 + .byte 126 + .byte 250 + .byte 254 + .byte 37 + .byte 112 + .byte 240 + .byte 181 + .byte 117 + .byte 32 + .byte 176 + .byte 245 + .byte 34 + .byte 39 + .byte 178 + .byte 183 + .byte 114 + .byte 119 + .byte 242 + .byte 247 + .byte 45 + .byte 121 + .byte 249 + .byte 189 + .byte 125 + .byte 41 + .byte 185 + .byte 253 + .byte 43 + .byte 47 + .byte 187 + .byte 191 + .byte 123 + .byte 127 + .byte 251 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_2, @object + .size table_2, 256 +table_2: + .byte 0 + .byte 2 + .byte 4 + .byte 6 + .byte 8 + .byte 10 + .byte 12 + .byte 14 + .byte 16 + .byte 18 + .byte 20 + .byte 22 + .byte 24 + .byte 26 + .byte 28 + .byte 30 + .byte 32 + .byte 34 + .byte 36 + .byte 38 + .byte 40 + .byte 42 + .byte 44 + .byte 46 + .byte 48 + .byte 50 + .byte 52 + .byte 54 + .byte 56 + .byte 58 + .byte 60 + .byte 62 + .byte 65 + .byte 67 + .byte 69 + .byte 71 + .byte 73 + .byte 75 + .byte 77 + .byte 79 + .byte 81 + .byte 83 + .byte 85 + .byte 87 + .byte 89 + .byte 91 + .byte 93 + .byte 95 + .byte 97 + .byte 99 + .byte 101 + .byte 103 + .byte 105 + .byte 107 + .byte 109 + .byte 111 + .byte 113 + .byte 115 + .byte 117 + .byte 119 + .byte 121 + .byte 123 + .byte 125 + .byte 127 + .byte 128 + .byte 130 + .byte 132 + .byte 134 + .byte 136 + .byte 138 + .byte 140 + .byte 142 + .byte 144 + .byte 146 + .byte 148 + .byte 150 + .byte 152 + .byte 154 + .byte 156 + .byte 158 + .byte 160 + .byte 162 + .byte 164 + .byte 166 + .byte 168 + .byte 170 + .byte 172 + .byte 174 + .byte 176 + .byte 178 + .byte 180 + .byte 182 + .byte 184 + .byte 186 + .byte 188 + .byte 190 + .byte 193 + .byte 195 + .byte 197 + .byte 199 + .byte 201 + .byte 203 + .byte 205 + .byte 207 + .byte 209 + .byte 211 + .byte 213 + .byte 215 + .byte 217 + .byte 219 + .byte 221 + .byte 223 + .byte 225 + .byte 227 + .byte 229 + .byte 231 + .byte 233 + .byte 235 + .byte 237 + .byte 239 + .byte 241 + .byte 243 + .byte 245 + .byte 247 + .byte 249 + .byte 251 + .byte 253 + .byte 255 + .byte 1 + .byte 3 + .byte 5 + .byte 7 + .byte 9 + .byte 11 + .byte 13 + .byte 15 + .byte 17 + .byte 19 + .byte 21 + .byte 23 + .byte 25 + .byte 27 + .byte 29 + .byte 31 + .byte 33 + .byte 35 + .byte 37 + .byte 39 + .byte 41 + .byte 43 + .byte 45 + .byte 47 + .byte 49 + .byte 51 + .byte 53 + .byte 55 + .byte 57 + .byte 59 + .byte 61 + .byte 63 + .byte 64 + .byte 66 + .byte 68 + .byte 70 + .byte 72 + .byte 74 + .byte 76 + .byte 78 + .byte 80 + .byte 82 + .byte 84 + .byte 86 + .byte 88 + .byte 90 + .byte 92 + .byte 94 + .byte 96 + .byte 98 + .byte 100 + .byte 102 + .byte 104 + .byte 106 + .byte 108 + .byte 110 + .byte 112 + .byte 114 + .byte 116 + .byte 118 + .byte 120 + .byte 122 + .byte 124 + .byte 126 + .byte 129 + .byte 131 + .byte 133 + .byte 135 + .byte 137 + .byte 139 + .byte 141 + .byte 143 + .byte 145 + .byte 147 + .byte 149 + .byte 151 + .byte 153 + .byte 155 + .byte 157 + .byte 159 + .byte 161 + .byte 163 + .byte 165 + .byte 167 + .byte 169 + .byte 171 + .byte 173 + .byte 175 + .byte 177 + .byte 179 + .byte 181 + .byte 183 + .byte 185 + .byte 187 + .byte 189 + .byte 191 + .byte 192 + .byte 194 + .byte 196 + .byte 198 + .byte 200 + .byte 202 + .byte 204 + .byte 206 + .byte 208 + .byte 210 + .byte 212 + .byte 214 + .byte 216 + .byte 218 + .byte 220 + .byte 222 + .byte 224 + .byte 226 + .byte 228 + .byte 230 + .byte 232 + .byte 234 + .byte 236 + .byte 238 + .byte 240 + .byte 242 + .byte 244 + .byte 246 + .byte 248 + .byte 250 + .byte 252 + .byte 254 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_3, @object + .size table_3, 256 +table_3: + .byte 0 + .byte 128 + .byte 1 + .byte 129 + .byte 2 + .byte 130 + .byte 3 + .byte 131 + .byte 4 + .byte 132 + .byte 5 + .byte 133 + .byte 6 + .byte 134 + .byte 7 + .byte 135 + .byte 8 + .byte 136 + .byte 9 + .byte 137 + .byte 10 + .byte 138 + .byte 11 + .byte 139 + .byte 12 + .byte 140 + .byte 13 + .byte 141 + .byte 14 + .byte 142 + .byte 15 + .byte 143 + .byte 16 + .byte 144 + .byte 17 + .byte 145 + .byte 18 + .byte 146 + .byte 19 + .byte 147 + .byte 20 + .byte 148 + .byte 21 + .byte 149 + .byte 22 + .byte 150 + .byte 23 + .byte 151 + .byte 24 + .byte 152 + .byte 25 + .byte 153 + .byte 26 + .byte 154 + .byte 27 + .byte 155 + .byte 28 + .byte 156 + .byte 29 + .byte 157 + .byte 30 + .byte 158 + .byte 31 + .byte 159 + .byte 160 + .byte 32 + .byte 161 + .byte 33 + .byte 162 + .byte 34 + .byte 163 + .byte 35 + .byte 164 + .byte 36 + .byte 165 + .byte 37 + .byte 166 + .byte 38 + .byte 167 + .byte 39 + .byte 168 + .byte 40 + .byte 169 + .byte 41 + .byte 170 + .byte 42 + .byte 171 + .byte 43 + .byte 172 + .byte 44 + .byte 173 + .byte 45 + .byte 174 + .byte 46 + .byte 175 + .byte 47 + .byte 176 + .byte 48 + .byte 177 + .byte 49 + .byte 178 + .byte 50 + .byte 179 + .byte 51 + .byte 180 + .byte 52 + .byte 181 + .byte 53 + .byte 182 + .byte 54 + .byte 183 + .byte 55 + .byte 184 + .byte 56 + .byte 185 + .byte 57 + .byte 186 + .byte 58 + .byte 187 + .byte 59 + .byte 188 + .byte 60 + .byte 189 + .byte 61 + .byte 190 + .byte 62 + .byte 191 + .byte 63 + .byte 64 + .byte 192 + .byte 65 + .byte 193 + .byte 66 + .byte 194 + .byte 67 + .byte 195 + .byte 68 + .byte 196 + .byte 69 + .byte 197 + .byte 70 + .byte 198 + .byte 71 + .byte 199 + .byte 72 + .byte 200 + .byte 73 + .byte 201 + .byte 74 + .byte 202 + .byte 75 + .byte 203 + .byte 76 + .byte 204 + .byte 77 + .byte 205 + .byte 78 + .byte 206 + .byte 79 + .byte 207 + .byte 80 + .byte 208 + .byte 81 + .byte 209 + .byte 82 + .byte 210 + .byte 83 + .byte 211 + .byte 84 + .byte 212 + .byte 85 + .byte 213 + .byte 86 + .byte 214 + .byte 87 + .byte 215 + .byte 88 + .byte 216 + .byte 89 + .byte 217 + .byte 90 + .byte 218 + .byte 91 + .byte 219 + .byte 92 + .byte 220 + .byte 93 + .byte 221 + .byte 94 + .byte 222 + .byte 95 + .byte 223 + .byte 224 + .byte 96 + .byte 225 + .byte 97 + .byte 226 + .byte 98 + .byte 227 + .byte 99 + .byte 228 + .byte 100 + .byte 229 + .byte 101 + .byte 230 + .byte 102 + .byte 231 + .byte 103 + .byte 232 + .byte 104 + .byte 233 + .byte 105 + .byte 234 + .byte 106 + .byte 235 + .byte 107 + .byte 236 + .byte 108 + .byte 237 + .byte 109 + .byte 238 + .byte 110 + .byte 239 + .byte 111 + .byte 240 + .byte 112 + .byte 241 + .byte 113 + .byte 242 + .byte 114 + .byte 243 + .byte 115 + .byte 244 + .byte 116 + .byte 245 + .byte 117 + .byte 246 + .byte 118 + .byte 247 + .byte 119 + .byte 248 + .byte 120 + .byte 249 + .byte 121 + .byte 250 + .byte 122 + .byte 251 + .byte 123 + .byte 252 + .byte 124 + .byte 253 + .byte 125 + .byte 254 + .byte 126 + .byte 255 + .byte 127 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_4, @object + .size table_4, 174 +table_4: + .byte 1 + .byte 0 + .byte 3 + .byte 0 + .byte 7 + .byte 0 + .byte 15 + .byte 0 + .byte 15 + .byte 1 + .byte 15 + .byte 3 + .byte 14 + .byte 7 + .byte 13 + .byte 7 + .byte 11 + .byte 7 + .byte 7 + .byte 7 + .byte 15 + .byte 6 + .byte 15 + .byte 5 + .byte 14 + .byte 3 + .byte 12 + .byte 7 + .byte 9 + .byte 7 + .byte 3 + .byte 7 + .byte 7 + .byte 6 + .byte 15 + .byte 4 + .byte 14 + .byte 1 + .byte 13 + .byte 3 + .byte 10 + .byte 7 + .byte 5 + .byte 7 + .byte 11 + .byte 6 + .byte 7 + .byte 5 + .byte 14 + .byte 2 + .byte 12 + .byte 5 + .byte 8 + .byte 3 + .byte 0 + .byte 7 + .byte 1 + .byte 6 + .byte 3 + .byte 4 + .byte 6 + .byte 0 + .byte 13 + .byte 0 + .byte 11 + .byte 1 + .byte 7 + .byte 3 + .byte 14 + .byte 6 + .byte 13 + .byte 5 + .byte 10 + .byte 3 + .byte 4 + .byte 7 + .byte 9 + .byte 6 + .byte 3 + .byte 5 + .byte 6 + .byte 2 + .byte 12 + .byte 4 + .byte 8 + .byte 1 + .byte 1 + .byte 3 + .byte 2 + .byte 6 + .byte 5 + .byte 4 + .byte 10 + .byte 0 + .byte 5 + .byte 1 + .byte 11 + .byte 2 + .byte 6 + .byte 5 + .byte 12 + .byte 2 + .byte 8 + .byte 5 + .byte 0 + .byte 3 + .byte 0 + .byte 6 + .byte 1 + .byte 4 + .byte 2 + .byte 0 + .byte 5 + .byte 0 + .byte 11 + .byte 0 + .byte 7 + .byte 1 + .byte 15 + .byte 2 + .byte 14 + .byte 5 + .byte 12 + .byte 3 + .byte 8 + .byte 7 + .byte 1 + .byte 7 + .byte 3 + .byte 6 + .byte 7 + .byte 4 + .byte 14 + .byte 0 + .byte 13 + .byte 1 + .byte 11 + .byte 3 + .byte 6 + .byte 7 + .byte 13 + .byte 6 + .byte 11 + .byte 5 + .byte 6 + .byte 3 + .byte 12 + .byte 6 + .byte 9 + .byte 5 + .byte 2 + .byte 3 + .byte 4 + .byte 6 + .byte 9 + .byte 4 + .byte 2 + .byte 1 + .byte 5 + .byte 2 + .byte 10 + .byte 4 + .byte 4 + .byte 1 + .byte 9 + .byte 2 + .byte 2 + .byte 5 + .byte 4 + .byte 2 + .byte 8 + .byte 4 + .byte 0 + .byte 1 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_5, @object + .size table_5, 256 +table_5: + .byte 204 + .byte 198 + .byte 201 + .byte 192 + .byte 193 + .byte 202 + .byte 194 + .byte 203 + .byte 195 + .byte 200 + .byte 197 + .byte 205 + .byte 196 + .byte 206 + .byte 199 + .byte 207 + .byte 108 + .byte 102 + .byte 105 + .byte 96 + .byte 97 + .byte 106 + .byte 98 + .byte 107 + .byte 99 + .byte 104 + .byte 101 + .byte 109 + .byte 100 + .byte 110 + .byte 103 + .byte 111 + .byte 156 + .byte 150 + .byte 153 + .byte 144 + .byte 145 + .byte 154 + .byte 146 + .byte 155 + .byte 147 + .byte 152 + .byte 149 + .byte 157 + .byte 148 + .byte 158 + .byte 151 + .byte 159 + .byte 12 + .byte 6 + .byte 9 + .byte 0 + .byte 1 + .byte 10 + .byte 2 + .byte 11 + .byte 3 + .byte 8 + .byte 5 + .byte 13 + .byte 4 + .byte 14 + .byte 7 + .byte 15 + .byte 28 + .byte 22 + .byte 25 + .byte 16 + .byte 17 + .byte 26 + .byte 18 + .byte 27 + .byte 19 + .byte 24 + .byte 21 + .byte 29 + .byte 20 + .byte 30 + .byte 23 + .byte 31 + .byte 172 + .byte 166 + .byte 169 + .byte 160 + .byte 161 + .byte 170 + .byte 162 + .byte 171 + .byte 163 + .byte 168 + .byte 165 + .byte 173 + .byte 164 + .byte 174 + .byte 167 + .byte 175 + .byte 44 + .byte 38 + .byte 41 + .byte 32 + .byte 33 + .byte 42 + .byte 34 + .byte 43 + .byte 35 + .byte 40 + .byte 37 + .byte 45 + .byte 36 + .byte 46 + .byte 39 + .byte 47 + .byte 188 + .byte 182 + .byte 185 + .byte 176 + .byte 177 + .byte 186 + .byte 178 + .byte 187 + .byte 179 + .byte 184 + .byte 181 + .byte 189 + .byte 180 + .byte 190 + .byte 183 + .byte 191 + .byte 60 + .byte 54 + .byte 57 + .byte 48 + .byte 49 + .byte 58 + .byte 50 + .byte 59 + .byte 51 + .byte 56 + .byte 53 + .byte 61 + .byte 52 + .byte 62 + .byte 55 + .byte 63 + .byte 140 + .byte 134 + .byte 137 + .byte 128 + .byte 129 + .byte 138 + .byte 130 + .byte 139 + .byte 131 + .byte 136 + .byte 133 + .byte 141 + .byte 132 + .byte 142 + .byte 135 + .byte 143 + .byte 92 + .byte 86 + .byte 89 + .byte 80 + .byte 81 + .byte 90 + .byte 82 + .byte 91 + .byte 83 + .byte 88 + .byte 85 + .byte 93 + .byte 84 + .byte 94 + .byte 87 + .byte 95 + .byte 220 + .byte 214 + .byte 217 + .byte 208 + .byte 209 + .byte 218 + .byte 210 + .byte 219 + .byte 211 + .byte 216 + .byte 213 + .byte 221 + .byte 212 + .byte 222 + .byte 215 + .byte 223 + .byte 76 + .byte 70 + .byte 73 + .byte 64 + .byte 65 + .byte 74 + .byte 66 + .byte 75 + .byte 67 + .byte 72 + .byte 69 + .byte 77 + .byte 68 + .byte 78 + .byte 71 + .byte 79 + .byte 236 + .byte 230 + .byte 233 + .byte 224 + .byte 225 + .byte 234 + .byte 226 + .byte 235 + .byte 227 + .byte 232 + .byte 229 + .byte 237 + .byte 228 + .byte 238 + .byte 231 + .byte 239 + .byte 124 + .byte 118 + .byte 121 + .byte 112 + .byte 113 + .byte 122 + .byte 114 + .byte 123 + .byte 115 + .byte 120 + .byte 117 + .byte 125 + .byte 116 + .byte 126 + .byte 119 + .byte 127 + .byte 252 + .byte 246 + .byte 249 + .byte 240 + .byte 241 + .byte 250 + .byte 242 + .byte 251 + .byte 243 + .byte 248 + .byte 245 + .byte 253 + .byte 244 + .byte 254 + .byte 247 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_6, @object + .size table_6, 256 +table_6: + .byte 51 + .byte 52 + .byte 54 + .byte 56 + .byte 60 + .byte 58 + .byte 49 + .byte 62 + .byte 57 + .byte 50 + .byte 53 + .byte 55 + .byte 48 + .byte 59 + .byte 61 + .byte 63 + .byte 67 + .byte 68 + .byte 70 + .byte 72 + .byte 76 + .byte 74 + .byte 65 + .byte 78 + .byte 73 + .byte 66 + .byte 69 + .byte 71 + .byte 64 + .byte 75 + .byte 77 + .byte 79 + .byte 99 + .byte 100 + .byte 102 + .byte 104 + .byte 108 + .byte 106 + .byte 97 + .byte 110 + .byte 105 + .byte 98 + .byte 101 + .byte 103 + .byte 96 + .byte 107 + .byte 109 + .byte 111 + .byte 131 + .byte 132 + .byte 134 + .byte 136 + .byte 140 + .byte 138 + .byte 129 + .byte 142 + .byte 137 + .byte 130 + .byte 133 + .byte 135 + .byte 128 + .byte 139 + .byte 141 + .byte 143 + .byte 195 + .byte 196 + .byte 198 + .byte 200 + .byte 204 + .byte 202 + .byte 193 + .byte 206 + .byte 201 + .byte 194 + .byte 197 + .byte 199 + .byte 192 + .byte 203 + .byte 205 + .byte 207 + .byte 163 + .byte 164 + .byte 166 + .byte 168 + .byte 172 + .byte 170 + .byte 161 + .byte 174 + .byte 169 + .byte 162 + .byte 165 + .byte 167 + .byte 160 + .byte 171 + .byte 173 + .byte 175 + .byte 19 + .byte 20 + .byte 22 + .byte 24 + .byte 28 + .byte 26 + .byte 17 + .byte 30 + .byte 25 + .byte 18 + .byte 21 + .byte 23 + .byte 16 + .byte 27 + .byte 29 + .byte 31 + .byte 227 + .byte 228 + .byte 230 + .byte 232 + .byte 236 + .byte 234 + .byte 225 + .byte 238 + .byte 233 + .byte 226 + .byte 229 + .byte 231 + .byte 224 + .byte 235 + .byte 237 + .byte 239 + .byte 147 + .byte 148 + .byte 150 + .byte 152 + .byte 156 + .byte 154 + .byte 145 + .byte 158 + .byte 153 + .byte 146 + .byte 149 + .byte 151 + .byte 144 + .byte 155 + .byte 157 + .byte 159 + .byte 35 + .byte 36 + .byte 38 + .byte 40 + .byte 44 + .byte 42 + .byte 33 + .byte 46 + .byte 41 + .byte 34 + .byte 37 + .byte 39 + .byte 32 + .byte 43 + .byte 45 + .byte 47 + .byte 83 + .byte 84 + .byte 86 + .byte 88 + .byte 92 + .byte 90 + .byte 81 + .byte 94 + .byte 89 + .byte 82 + .byte 85 + .byte 87 + .byte 80 + .byte 91 + .byte 93 + .byte 95 + .byte 115 + .byte 116 + .byte 118 + .byte 120 + .byte 124 + .byte 122 + .byte 113 + .byte 126 + .byte 121 + .byte 114 + .byte 117 + .byte 119 + .byte 112 + .byte 123 + .byte 125 + .byte 127 + .byte 3 + .byte 4 + .byte 6 + .byte 8 + .byte 12 + .byte 10 + .byte 1 + .byte 14 + .byte 9 + .byte 2 + .byte 5 + .byte 7 + .byte 0 + .byte 11 + .byte 13 + .byte 15 + .byte 179 + .byte 180 + .byte 182 + .byte 184 + .byte 188 + .byte 186 + .byte 177 + .byte 190 + .byte 185 + .byte 178 + .byte 181 + .byte 183 + .byte 176 + .byte 187 + .byte 189 + .byte 191 + .byte 211 + .byte 212 + .byte 214 + .byte 216 + .byte 220 + .byte 218 + .byte 209 + .byte 222 + .byte 217 + .byte 210 + .byte 213 + .byte 215 + .byte 208 + .byte 219 + .byte 221 + .byte 223 + .byte 243 + .byte 244 + .byte 246 + .byte 248 + .byte 252 + .byte 250 + .byte 241 + .byte 254 + .byte 249 + .byte 242 + .byte 245 + .byte 247 + .byte 240 + .byte 251 + .byte 253 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_7, @object + .size table_7, 256 +table_7: + .byte 0 + .byte 2 + .byte 4 + .byte 6 + .byte 9 + .byte 11 + .byte 13 + .byte 15 + .byte 1 + .byte 3 + .byte 5 + .byte 7 + .byte 8 + .byte 10 + .byte 12 + .byte 14 + .byte 32 + .byte 34 + .byte 36 + .byte 38 + .byte 41 + .byte 43 + .byte 45 + .byte 47 + .byte 33 + .byte 35 + .byte 37 + .byte 39 + .byte 40 + .byte 42 + .byte 44 + .byte 46 + .byte 64 + .byte 66 + .byte 68 + .byte 70 + .byte 73 + .byte 75 + .byte 77 + .byte 79 + .byte 65 + .byte 67 + .byte 69 + .byte 71 + .byte 72 + .byte 74 + .byte 76 + .byte 78 + .byte 96 + .byte 98 + .byte 100 + .byte 102 + .byte 105 + .byte 107 + .byte 109 + .byte 111 + .byte 97 + .byte 99 + .byte 101 + .byte 103 + .byte 104 + .byte 106 + .byte 108 + .byte 110 + .byte 144 + .byte 146 + .byte 148 + .byte 150 + .byte 153 + .byte 155 + .byte 157 + .byte 159 + .byte 145 + .byte 147 + .byte 149 + .byte 151 + .byte 152 + .byte 154 + .byte 156 + .byte 158 + .byte 176 + .byte 178 + .byte 180 + .byte 182 + .byte 185 + .byte 187 + .byte 189 + .byte 191 + .byte 177 + .byte 179 + .byte 181 + .byte 183 + .byte 184 + .byte 186 + .byte 188 + .byte 190 + .byte 208 + .byte 210 + .byte 212 + .byte 214 + .byte 217 + .byte 219 + .byte 221 + .byte 223 + .byte 209 + .byte 211 + .byte 213 + .byte 215 + .byte 216 + .byte 218 + .byte 220 + .byte 222 + .byte 240 + .byte 242 + .byte 244 + .byte 246 + .byte 249 + .byte 251 + .byte 253 + .byte 255 + .byte 241 + .byte 243 + .byte 245 + .byte 247 + .byte 248 + .byte 250 + .byte 252 + .byte 254 + .byte 16 + .byte 18 + .byte 20 + .byte 22 + .byte 25 + .byte 27 + .byte 29 + .byte 31 + .byte 17 + .byte 19 + .byte 21 + .byte 23 + .byte 24 + .byte 26 + .byte 28 + .byte 30 + .byte 48 + .byte 50 + .byte 52 + .byte 54 + .byte 57 + .byte 59 + .byte 61 + .byte 63 + .byte 49 + .byte 51 + .byte 53 + .byte 55 + .byte 56 + .byte 58 + .byte 60 + .byte 62 + .byte 80 + .byte 82 + .byte 84 + .byte 86 + .byte 89 + .byte 91 + .byte 93 + .byte 95 + .byte 81 + .byte 83 + .byte 85 + .byte 87 + .byte 88 + .byte 90 + .byte 92 + .byte 94 + .byte 112 + .byte 114 + .byte 116 + .byte 118 + .byte 121 + .byte 123 + .byte 125 + .byte 127 + .byte 113 + .byte 115 + .byte 117 + .byte 119 + .byte 120 + .byte 122 + .byte 124 + .byte 126 + .byte 128 + .byte 130 + .byte 132 + .byte 134 + .byte 137 + .byte 139 + .byte 141 + .byte 143 + .byte 129 + .byte 131 + .byte 133 + .byte 135 + .byte 136 + .byte 138 + .byte 140 + .byte 142 + .byte 160 + .byte 162 + .byte 164 + .byte 166 + .byte 169 + .byte 171 + .byte 173 + .byte 175 + .byte 161 + .byte 163 + .byte 165 + .byte 167 + .byte 168 + .byte 170 + .byte 172 + .byte 174 + .byte 192 + .byte 194 + .byte 196 + .byte 198 + .byte 201 + .byte 203 + .byte 205 + .byte 207 + .byte 193 + .byte 195 + .byte 197 + .byte 199 + .byte 200 + .byte 202 + .byte 204 + .byte 206 + .byte 224 + .byte 226 + .byte 228 + .byte 230 + .byte 233 + .byte 235 + .byte 237 + .byte 239 + .byte 225 + .byte 227 + .byte 229 + .byte 231 + .byte 232 + .byte 234 + .byte 236 + .byte 238 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_8, @object + .size table_8, 256 +table_8: + .byte 0 + .byte 8 + .byte 1 + .byte 9 + .byte 2 + .byte 10 + .byte 3 + .byte 11 + .byte 12 + .byte 4 + .byte 13 + .byte 5 + .byte 14 + .byte 6 + .byte 15 + .byte 7 + .byte 128 + .byte 136 + .byte 129 + .byte 137 + .byte 130 + .byte 138 + .byte 131 + .byte 139 + .byte 140 + .byte 132 + .byte 141 + .byte 133 + .byte 142 + .byte 134 + .byte 143 + .byte 135 + .byte 16 + .byte 24 + .byte 17 + .byte 25 + .byte 18 + .byte 26 + .byte 19 + .byte 27 + .byte 28 + .byte 20 + .byte 29 + .byte 21 + .byte 30 + .byte 22 + .byte 31 + .byte 23 + .byte 144 + .byte 152 + .byte 145 + .byte 153 + .byte 146 + .byte 154 + .byte 147 + .byte 155 + .byte 156 + .byte 148 + .byte 157 + .byte 149 + .byte 158 + .byte 150 + .byte 159 + .byte 151 + .byte 32 + .byte 40 + .byte 33 + .byte 41 + .byte 34 + .byte 42 + .byte 35 + .byte 43 + .byte 44 + .byte 36 + .byte 45 + .byte 37 + .byte 46 + .byte 38 + .byte 47 + .byte 39 + .byte 160 + .byte 168 + .byte 161 + .byte 169 + .byte 162 + .byte 170 + .byte 163 + .byte 171 + .byte 172 + .byte 164 + .byte 173 + .byte 165 + .byte 174 + .byte 166 + .byte 175 + .byte 167 + .byte 48 + .byte 56 + .byte 49 + .byte 57 + .byte 50 + .byte 58 + .byte 51 + .byte 59 + .byte 60 + .byte 52 + .byte 61 + .byte 53 + .byte 62 + .byte 54 + .byte 63 + .byte 55 + .byte 176 + .byte 184 + .byte 177 + .byte 185 + .byte 178 + .byte 186 + .byte 179 + .byte 187 + .byte 188 + .byte 180 + .byte 189 + .byte 181 + .byte 190 + .byte 182 + .byte 191 + .byte 183 + .byte 192 + .byte 200 + .byte 193 + .byte 201 + .byte 194 + .byte 202 + .byte 195 + .byte 203 + .byte 204 + .byte 196 + .byte 205 + .byte 197 + .byte 206 + .byte 198 + .byte 207 + .byte 199 + .byte 64 + .byte 72 + .byte 65 + .byte 73 + .byte 66 + .byte 74 + .byte 67 + .byte 75 + .byte 76 + .byte 68 + .byte 77 + .byte 69 + .byte 78 + .byte 70 + .byte 79 + .byte 71 + .byte 208 + .byte 216 + .byte 209 + .byte 217 + .byte 210 + .byte 218 + .byte 211 + .byte 219 + .byte 220 + .byte 212 + .byte 221 + .byte 213 + .byte 222 + .byte 214 + .byte 223 + .byte 215 + .byte 80 + .byte 88 + .byte 81 + .byte 89 + .byte 82 + .byte 90 + .byte 83 + .byte 91 + .byte 92 + .byte 84 + .byte 93 + .byte 85 + .byte 94 + .byte 86 + .byte 95 + .byte 87 + .byte 224 + .byte 232 + .byte 225 + .byte 233 + .byte 226 + .byte 234 + .byte 227 + .byte 235 + .byte 236 + .byte 228 + .byte 237 + .byte 229 + .byte 238 + .byte 230 + .byte 239 + .byte 231 + .byte 96 + .byte 104 + .byte 97 + .byte 105 + .byte 98 + .byte 106 + .byte 99 + .byte 107 + .byte 108 + .byte 100 + .byte 109 + .byte 101 + .byte 110 + .byte 102 + .byte 111 + .byte 103 + .byte 240 + .byte 248 + .byte 241 + .byte 249 + .byte 242 + .byte 250 + .byte 243 + .byte 251 + .byte 252 + .byte 244 + .byte 253 + .byte 245 + .byte 254 + .byte 246 + .byte 255 + .byte 247 + .byte 112 + .byte 120 + .byte 113 + .byte 121 + .byte 114 + .byte 122 + .byte 115 + .byte 123 + .byte 124 + .byte 116 + .byte 125 + .byte 117 + .byte 126 + .byte 118 + .byte 127 + .byte 119 + + .text +.global forkskinny_128_256_rounds + .type forkskinny_128_256_rounds, @function +forkskinny_128_256_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 51 + ldd r2,Z+32 + ldd r3,Z+33 + ldd r4,Z+34 + ldd r5,Z+35 + ldd r6,Z+36 + ldd r7,Z+37 + ldd r8,Z+38 + ldd r9,Z+39 + ldd r10,Z+40 + ldd r11,Z+41 + ldd r12,Z+42 + ldd r13,Z+43 + ldd r14,Z+44 + ldd r15,Z+45 + ldd r24,Z+46 + ldd r25,Z+47 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + lsl r20 + std Y+33,r20 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +86: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + mov r0,r14 + mov r14,r15 + mov r15,r24 + mov r24,r25 + mov r25,r0 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + movw r18,r14 + movw r26,r24 + eor r18,r10 + eor r19,r11 + eor r26,r12 + eor r27,r13 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r18 + movw r4,r26 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + ldd r0,Y+1 + std Y+9,r0 + ldd r0,Y+2 + std Y+10,r0 + ldd r0,Y+3 + std Y+11,r0 + ldd r0,Y+4 + std Y+12,r0 + ldd r0,Y+5 + std Y+13,r0 + ldd r0,Y+6 + std Y+14,r0 + ldd r0,Y+7 + std Y+15,r0 + ldd r0,Y+8 + std Y+16,r0 + std Y+1,r19 + std Y+2,r17 + std Y+3,r18 + std Y+4,r21 + std Y+5,r26 + std Y+6,r16 + std Y+7,r20 + std Y+8,r27 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r16,Y+31 + ldd r17,Y+32 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+17 + std Y+25,r0 + ldd r0,Y+18 + std Y+26,r0 + ldd r0,Y+19 + std Y+27,r0 + ldd r0,Y+20 + std Y+28,r0 + ldd r0,Y+21 + std Y+29,r0 + ldd r0,Y+22 + std Y+30,r0 + ldd r0,Y+23 + std Y+31,r0 + ldd r0,Y+24 + std Y+32,r0 + std Y+17,r19 + std Y+18,r17 + std Y+19,r18 + std Y+20,r21 + std Y+21,r26 + std Y+22,r16 + std Y+23,r20 + std Y+24,r27 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + ldd r18,Y+33 + cp r22,r18 + breq 5259f + rjmp 86b +5259: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+32,r2 + std Z+33,r3 + std Z+34,r4 + std Z+35,r5 + std Z+36,r6 + std Z+37,r7 + std Z+38,r8 + std Z+39,r9 + std Z+40,r10 + std Z+41,r11 + std Z+42,r12 + std Z+43,r13 + std Z+44,r14 + std Z+45,r15 + std Z+46,r24 + std Z+47,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + adiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_256_rounds, .-forkskinny_128_256_rounds + + .text +.global forkskinny_128_256_inv_rounds + .type forkskinny_128_256_inv_rounds, @function +forkskinny_128_256_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 51 + ldd r2,Z+32 + ldd r3,Z+33 + ldd r4,Z+34 + ldd r5,Z+35 + ldd r6,Z+36 + ldd r7,Z+37 + ldd r8,Z+38 + ldd r9,Z+39 + ldd r10,Z+40 + ldd r11,Z+41 + ldd r12,Z+42 + ldd r13,Z+43 + ldd r14,Z+44 + ldd r15,Z+45 + ldd r24,Z+46 + ldd r25,Z+47 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + lsl r20 + std Y+33,r20 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +86: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + ldd r0,Y+9 + std Y+1,r0 + ldd r0,Y+10 + std Y+2,r0 + ldd r0,Y+11 + std Y+3,r0 + ldd r0,Y+12 + std Y+4,r0 + ldd r0,Y+13 + std Y+5,r0 + ldd r0,Y+14 + std Y+6,r0 + ldd r0,Y+15 + std Y+7,r0 + ldd r0,Y+16 + std Y+8,r0 + std Y+9,r26 + std Y+10,r18 + std Y+11,r20 + std Y+12,r17 + std Y+13,r16 + std Y+14,r27 + std Y+15,r21 + std Y+16,r19 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r16,Y+23 + ldd r17,Y+24 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+25 + std Y+17,r0 + ldd r0,Y+26 + std Y+18,r0 + ldd r0,Y+27 + std Y+19,r0 + ldd r0,Y+28 + std Y+20,r0 + ldd r0,Y+29 + std Y+21,r0 + ldd r0,Y+30 + std Y+22,r0 + ldd r0,Y+31 + std Y+23,r0 + ldd r0,Y+32 + std Y+24,r0 + std Y+25,r26 + std Y+26,r18 + std Y+27,r20 + std Y+28,r17 + std Y+29,r16 + std Y+30,r27 + std Y+31,r21 + std Y+32,r19 + movw r18,r2 + movw r26,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r18 + movw r24,r26 + eor r14,r10 + eor r15,r11 + eor r24,r12 + eor r25,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + mov r0,r6 + mov r6,r7 + mov r7,r8 + mov r8,r9 + mov r9,r0 + mov r0,r10 + mov r10,r12 + mov r12,r0 + mov r0,r11 + mov r11,r13 + mov r13,r0 + mov r0,r25 + mov r25,r24 + mov r24,r15 + mov r15,r14 + mov r14,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + ldi r30,lo8(table_1) + ldi r31,hi8(table_1) +#if defined(RAMPZ) + ldi r18,hh8(table_1) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+33 + cp r22,r18 + breq 5259f + rjmp 86b +5259: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+32,r2 + std Z+33,r3 + std Z+34,r4 + std Z+35,r5 + std Z+36,r6 + std Z+37,r7 + std Z+38,r8 + std Z+39,r9 + std Z+40,r10 + std Z+41,r11 + std Z+42,r12 + std Z+43,r13 + std Z+44,r14 + std Z+45,r15 + std Z+46,r24 + std Z+47,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + adiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_256_inv_rounds, .-forkskinny_128_256_inv_rounds + + .text +.global forkskinny_128_256_forward_tk + .type forkskinny_128_256_forward_tk, @function +forkskinny_128_256_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 34 + ld r4,Z + ldd r5,Z+1 + ldd r6,Z+2 + ldd r7,Z+3 + ldd r8,Z+4 + ldd r9,Z+5 + ldd r10,Z+6 + ldd r11,Z+7 + ldd r12,Z+8 + ldd r13,Z+9 + ldd r14,Z+10 + ldd r15,Z+11 + ldd r24,Z+12 + ldd r25,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + push r31 + push r30 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r23,hh8(table_2) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +51: + movw r18,r12 + movw r20,r14 + movw r26,r24 + movw r2,r16 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + mov r4,r19 + mov r5,r3 + mov r6,r18 + mov r7,r27 + mov r8,r20 + mov r9,r2 + mov r10,r26 + mov r11,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r26,Y+13 + ldd r27,Y+14 + ldd r2,Y+15 + ldd r3,Y+16 + ldd r23,Y+1 + std Y+9,r23 + ldd r23,Y+2 + std Y+10,r23 + ldd r23,Y+3 + std Y+11,r23 + ldd r23,Y+4 + std Y+12,r23 + ldd r23,Y+5 + std Y+13,r23 + ldd r23,Y+6 + std Y+14,r23 + ldd r23,Y+7 + std Y+15,r23 + ldd r23,Y+8 + std Y+16,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+1,r19 + std Y+2,r3 + std Y+3,r18 + std Y+4,r27 + std Y+5,r20 + std Y+6,r2 + std Y+7,r26 + std Y+8,r21 + dec r22 + breq 5109f + rjmp 51b +5109: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r4 + std Z+1,r5 + std Z+2,r6 + std Z+3,r7 + std Z+4,r8 + std Z+5,r9 + std Z+6,r10 + std Z+7,r11 + std Z+8,r12 + std Z+9,r13 + std Z+10,r14 + std Z+11,r15 + std Z+12,r24 + std Z+13,r25 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_256_forward_tk, .-forkskinny_128_256_forward_tk + + .text +.global forkskinny_128_256_reverse_tk + .type forkskinny_128_256_reverse_tk, @function +forkskinny_128_256_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 34 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r23,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +51: + movw r18,r2 + movw r20,r4 + movw r26,r6 + movw r24,r8 + movw r2,r10 + movw r4,r12 + movw r6,r14 + movw r8,r16 + mov r10,r20 + mov r11,r18 + mov r12,r26 + mov r13,r25 + mov r14,r24 + mov r15,r21 + mov r16,r27 + mov r17,r19 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r24,Y+7 + ldd r25,Y+8 + ldd r23,Y+9 + std Y+1,r23 + ldd r23,Y+10 + std Y+2,r23 + ldd r23,Y+11 + std Y+3,r23 + ldd r23,Y+12 + std Y+4,r23 + ldd r23,Y+13 + std Y+5,r23 + ldd r23,Y+14 + std Y+6,r23 + ldd r23,Y+15 + std Y+7,r23 + ldd r23,Y+16 + std Y+8,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+9,r20 + std Y+10,r18 + std Y+11,r26 + std Y+12,r25 + std Y+13,r24 + std Y+14,r21 + std Y+15,r27 + std Y+16,r19 + dec r22 + breq 5109f + rjmp 51b +5109: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r2 + std Z+1,r3 + std Z+2,r4 + std Z+3,r5 + std Z+4,r6 + std Z+5,r7 + std Z+6,r8 + std Z+7,r9 + std Z+8,r10 + std Z+9,r11 + std Z+10,r12 + std Z+11,r13 + std Z+12,r14 + std Z+13,r15 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_256_reverse_tk, .-forkskinny_128_256_reverse_tk + + .text +.global forkskinny_128_384_rounds + .type forkskinny_128_384_rounds, @function +forkskinny_128_384_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 67 + ldd r2,Z+48 + ldd r3,Z+49 + ldd r4,Z+50 + ldd r5,Z+51 + ldd r6,Z+52 + ldd r7,Z+53 + ldd r8,Z+54 + ldd r9,Z+55 + ldd r10,Z+56 + ldd r11,Z+57 + ldd r12,Z+58 + ldd r13,Z+59 + ldd r14,Z+60 + ldd r15,Z+61 + ldd r24,Z+62 + ldd r25,Z+63 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r26,Z+34 + ldd r27,Z+35 + std Y+33,r18 + std Y+34,r19 + std Y+35,r26 + std Y+36,r27 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + std Y+37,r18 + std Y+38,r19 + std Y+39,r26 + std Y+40,r27 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r26,Z+42 + ldd r27,Z+43 + std Y+41,r18 + std Y+42,r19 + std Y+43,r26 + std Y+44,r27 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r26,Z+46 + ldd r27,Z+47 + std Y+45,r18 + std Y+46,r19 + std Y+47,r26 + std Y+48,r27 + lsl r20 + std Y+49,r20 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +118: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + mov r0,r14 + mov r14,r15 + mov r15,r24 + mov r24,r25 + mov r25,r0 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + movw r18,r14 + movw r26,r24 + eor r18,r10 + eor r19,r11 + eor r26,r12 + eor r27,r13 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r18 + movw r4,r26 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + ldd r0,Y+1 + std Y+9,r0 + ldd r0,Y+2 + std Y+10,r0 + ldd r0,Y+3 + std Y+11,r0 + ldd r0,Y+4 + std Y+12,r0 + ldd r0,Y+5 + std Y+13,r0 + ldd r0,Y+6 + std Y+14,r0 + ldd r0,Y+7 + std Y+15,r0 + ldd r0,Y+8 + std Y+16,r0 + std Y+1,r19 + std Y+2,r17 + std Y+3,r18 + std Y+4,r21 + std Y+5,r26 + std Y+6,r16 + std Y+7,r20 + std Y+8,r27 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r16,Y+31 + ldd r17,Y+32 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+17 + std Y+25,r0 + ldd r0,Y+18 + std Y+26,r0 + ldd r0,Y+19 + std Y+27,r0 + ldd r0,Y+20 + std Y+28,r0 + ldd r0,Y+21 + std Y+29,r0 + ldd r0,Y+22 + std Y+30,r0 + ldd r0,Y+23 + std Y+31,r0 + ldd r0,Y+24 + std Y+32,r0 + std Y+17,r19 + std Y+18,r17 + std Y+19,r18 + std Y+20,r21 + std Y+21,r26 + std Y+22,r16 + std Y+23,r20 + std Y+24,r27 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + ldd r20,Y+45 + ldd r21,Y+46 + ldd r16,Y+47 + ldd r17,Y+48 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+33 + std Y+41,r0 + ldd r0,Y+34 + std Y+42,r0 + ldd r0,Y+35 + std Y+43,r0 + ldd r0,Y+36 + std Y+44,r0 + ldd r0,Y+37 + std Y+45,r0 + ldd r0,Y+38 + std Y+46,r0 + ldd r0,Y+39 + std Y+47,r0 + ldd r0,Y+40 + std Y+48,r0 + std Y+33,r19 + std Y+34,r17 + std Y+35,r18 + std Y+36,r21 + std Y+37,r26 + std Y+38,r16 + std Y+39,r20 + std Y+40,r27 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + ldd r18,Y+49 + cp r22,r18 + breq 5348f + rjmp 118b +5348: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+48,r2 + std Z+49,r3 + std Z+50,r4 + std Z+51,r5 + std Z+52,r6 + std Z+53,r7 + std Z+54,r8 + std Z+55,r9 + std Z+56,r10 + std Z+57,r11 + std Z+58,r12 + std Z+59,r13 + std Z+60,r14 + std Z+61,r15 + std Z+62,r24 + std Z+63,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + std Z+32,r18 + std Z+33,r19 + std Z+34,r26 + std Z+35,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + std Z+36,r18 + std Z+37,r19 + std Z+38,r26 + std Z+39,r27 + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + std Z+40,r18 + std Z+41,r19 + std Z+42,r26 + std Z+43,r27 + ldd r18,Y+45 + ldd r19,Y+46 + ldd r26,Y+47 + ldd r27,Y+48 + std Z+44,r18 + std Z+45,r19 + std Z+46,r26 + std Z+47,r27 + adiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_384_rounds, .-forkskinny_128_384_rounds + + .text +.global forkskinny_128_384_inv_rounds + .type forkskinny_128_384_inv_rounds, @function +forkskinny_128_384_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 67 + ldd r2,Z+48 + ldd r3,Z+49 + ldd r4,Z+50 + ldd r5,Z+51 + ldd r6,Z+52 + ldd r7,Z+53 + ldd r8,Z+54 + ldd r9,Z+55 + ldd r10,Z+56 + ldd r11,Z+57 + ldd r12,Z+58 + ldd r13,Z+59 + ldd r14,Z+60 + ldd r15,Z+61 + ldd r24,Z+62 + ldd r25,Z+63 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r26,Z+34 + ldd r27,Z+35 + std Y+33,r18 + std Y+34,r19 + std Y+35,r26 + std Y+36,r27 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + std Y+37,r18 + std Y+38,r19 + std Y+39,r26 + std Y+40,r27 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r26,Z+42 + ldd r27,Z+43 + std Y+41,r18 + std Y+42,r19 + std Y+43,r26 + std Y+44,r27 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r26,Z+46 + ldd r27,Z+47 + std Y+45,r18 + std Y+46,r19 + std Y+47,r26 + std Y+48,r27 + lsl r20 + std Y+49,r20 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +118: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + ldd r0,Y+9 + std Y+1,r0 + ldd r0,Y+10 + std Y+2,r0 + ldd r0,Y+11 + std Y+3,r0 + ldd r0,Y+12 + std Y+4,r0 + ldd r0,Y+13 + std Y+5,r0 + ldd r0,Y+14 + std Y+6,r0 + ldd r0,Y+15 + std Y+7,r0 + ldd r0,Y+16 + std Y+8,r0 + std Y+9,r26 + std Y+10,r18 + std Y+11,r20 + std Y+12,r17 + std Y+13,r16 + std Y+14,r27 + std Y+15,r21 + std Y+16,r19 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r16,Y+23 + ldd r17,Y+24 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+25 + std Y+17,r0 + ldd r0,Y+26 + std Y+18,r0 + ldd r0,Y+27 + std Y+19,r0 + ldd r0,Y+28 + std Y+20,r0 + ldd r0,Y+29 + std Y+21,r0 + ldd r0,Y+30 + std Y+22,r0 + ldd r0,Y+31 + std Y+23,r0 + ldd r0,Y+32 + std Y+24,r0 + std Y+25,r26 + std Y+26,r18 + std Y+27,r20 + std Y+28,r17 + std Y+29,r16 + std Y+30,r27 + std Y+31,r21 + std Y+32,r19 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + ldd r20,Y+37 + ldd r21,Y+38 + ldd r16,Y+39 + ldd r17,Y+40 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+41 + std Y+33,r0 + ldd r0,Y+42 + std Y+34,r0 + ldd r0,Y+43 + std Y+35,r0 + ldd r0,Y+44 + std Y+36,r0 + ldd r0,Y+45 + std Y+37,r0 + ldd r0,Y+46 + std Y+38,r0 + ldd r0,Y+47 + std Y+39,r0 + ldd r0,Y+48 + std Y+40,r0 + std Y+41,r26 + std Y+42,r18 + std Y+43,r20 + std Y+44,r17 + std Y+45,r16 + std Y+46,r27 + std Y+47,r21 + std Y+48,r19 + movw r18,r2 + movw r26,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r18 + movw r24,r26 + eor r14,r10 + eor r15,r11 + eor r24,r12 + eor r25,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + mov r0,r6 + mov r6,r7 + mov r7,r8 + mov r8,r9 + mov r9,r0 + mov r0,r10 + mov r10,r12 + mov r12,r0 + mov r0,r11 + mov r11,r13 + mov r13,r0 + mov r0,r25 + mov r25,r24 + mov r24,r15 + mov r15,r14 + mov r14,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + ldi r30,lo8(table_1) + ldi r31,hi8(table_1) +#if defined(RAMPZ) + ldi r18,hh8(table_1) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+49 + cp r22,r18 + breq 5348f + rjmp 118b +5348: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+48,r2 + std Z+49,r3 + std Z+50,r4 + std Z+51,r5 + std Z+52,r6 + std Z+53,r7 + std Z+54,r8 + std Z+55,r9 + std Z+56,r10 + std Z+57,r11 + std Z+58,r12 + std Z+59,r13 + std Z+60,r14 + std Z+61,r15 + std Z+62,r24 + std Z+63,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + std Z+32,r18 + std Z+33,r19 + std Z+34,r26 + std Z+35,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + std Z+36,r18 + std Z+37,r19 + std Z+38,r26 + std Z+39,r27 + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + std Z+40,r18 + std Z+41,r19 + std Z+42,r26 + std Z+43,r27 + ldd r18,Y+45 + ldd r19,Y+46 + ldd r26,Y+47 + ldd r27,Y+48 + std Z+44,r18 + std Z+45,r19 + std Z+46,r26 + std Z+47,r27 + adiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_384_inv_rounds, .-forkskinny_128_384_inv_rounds + + .text +.global forkskinny_128_384_forward_tk + .type forkskinny_128_384_forward_tk, @function +forkskinny_128_384_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r4,Z + ldd r5,Z+1 + ldd r6,Z+2 + ldd r7,Z+3 + ldd r8,Z+4 + ldd r9,Z+5 + ldd r10,Z+6 + ldd r11,Z+7 + ldd r12,Z+8 + ldd r13,Z+9 + ldd r14,Z+10 + ldd r15,Z+11 + ldd r24,Z+12 + ldd r25,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + std Y+17,r18 + std Y+18,r19 + std Y+19,r20 + std Y+20,r21 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r20,Z+38 + ldd r21,Z+39 + std Y+21,r18 + std Y+22,r19 + std Y+23,r20 + std Y+24,r21 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r20,Z+42 + ldd r21,Z+43 + std Y+25,r18 + std Y+26,r19 + std Y+27,r20 + std Y+28,r21 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r20,Z+46 + ldd r21,Z+47 + std Y+29,r18 + std Y+30,r19 + std Y+31,r20 + std Y+32,r21 + push r31 + push r30 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r23,hh8(table_2) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +83: + movw r18,r12 + movw r20,r14 + movw r26,r24 + movw r2,r16 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + mov r4,r19 + mov r5,r3 + mov r6,r18 + mov r7,r27 + mov r8,r20 + mov r9,r2 + mov r10,r26 + mov r11,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r26,Y+13 + ldd r27,Y+14 + ldd r2,Y+15 + ldd r3,Y+16 + ldd r23,Y+1 + std Y+9,r23 + ldd r23,Y+2 + std Y+10,r23 + ldd r23,Y+3 + std Y+11,r23 + ldd r23,Y+4 + std Y+12,r23 + ldd r23,Y+5 + std Y+13,r23 + ldd r23,Y+6 + std Y+14,r23 + ldd r23,Y+7 + std Y+15,r23 + ldd r23,Y+8 + std Y+16,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+1,r19 + std Y+2,r3 + std Y+3,r18 + std Y+4,r27 + std Y+5,r20 + std Y+6,r2 + std Y+7,r26 + std Y+8,r21 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + ldd r26,Y+29 + ldd r27,Y+30 + ldd r2,Y+31 + ldd r3,Y+32 + ldd r23,Y+17 + std Y+25,r23 + ldd r23,Y+18 + std Y+26,r23 + ldd r23,Y+19 + std Y+27,r23 + ldd r23,Y+20 + std Y+28,r23 + ldd r23,Y+21 + std Y+29,r23 + ldd r23,Y+22 + std Y+30,r23 + ldd r23,Y+23 + std Y+31,r23 + ldd r23,Y+24 + std Y+32,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+17,r19 + std Y+18,r3 + std Y+19,r18 + std Y+20,r27 + std Y+21,r20 + std Y+22,r2 + std Y+23,r26 + std Y+24,r21 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + breq 5183f + rjmp 83b +5183: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r4 + std Z+1,r5 + std Z+2,r6 + std Z+3,r7 + std Z+4,r8 + std Z+5,r9 + std Z+6,r10 + std Z+7,r11 + std Z+8,r12 + std Z+9,r13 + std Z+10,r14 + std Z+11,r15 + std Z+12,r24 + std Z+13,r25 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r20,Y+23 + ldd r21,Y+24 + std Z+36,r18 + std Z+37,r19 + std Z+38,r20 + std Z+39,r21 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + std Z+40,r18 + std Z+41,r19 + std Z+42,r20 + std Z+43,r21 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r20,Y+31 + ldd r21,Y+32 + std Z+44,r18 + std Z+45,r19 + std Z+46,r20 + std Z+47,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_384_forward_tk, .-forkskinny_128_384_forward_tk + + .text +.global forkskinny_128_384_reverse_tk + .type forkskinny_128_384_reverse_tk, @function +forkskinny_128_384_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + std Y+17,r18 + std Y+18,r19 + std Y+19,r20 + std Y+20,r21 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r20,Z+38 + ldd r21,Z+39 + std Y+21,r18 + std Y+22,r19 + std Y+23,r20 + std Y+24,r21 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r20,Z+42 + ldd r21,Z+43 + std Y+25,r18 + std Y+26,r19 + std Y+27,r20 + std Y+28,r21 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r20,Z+46 + ldd r21,Z+47 + std Y+29,r18 + std Y+30,r19 + std Y+31,r20 + std Y+32,r21 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r23,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +83: + movw r18,r2 + movw r20,r4 + movw r26,r6 + movw r24,r8 + movw r2,r10 + movw r4,r12 + movw r6,r14 + movw r8,r16 + mov r10,r20 + mov r11,r18 + mov r12,r26 + mov r13,r25 + mov r14,r24 + mov r15,r21 + mov r16,r27 + mov r17,r19 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r24,Y+7 + ldd r25,Y+8 + ldd r23,Y+9 + std Y+1,r23 + ldd r23,Y+10 + std Y+2,r23 + ldd r23,Y+11 + std Y+3,r23 + ldd r23,Y+12 + std Y+4,r23 + ldd r23,Y+13 + std Y+5,r23 + ldd r23,Y+14 + std Y+6,r23 + ldd r23,Y+15 + std Y+7,r23 + ldd r23,Y+16 + std Y+8,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+9,r20 + std Y+10,r18 + std Y+11,r26 + std Y+12,r25 + std Y+13,r24 + std Y+14,r21 + std Y+15,r27 + std Y+16,r19 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + ldd r26,Y+21 + ldd r27,Y+22 + ldd r24,Y+23 + ldd r25,Y+24 + ldd r23,Y+25 + std Y+17,r23 + ldd r23,Y+26 + std Y+18,r23 + ldd r23,Y+27 + std Y+19,r23 + ldd r23,Y+28 + std Y+20,r23 + ldd r23,Y+29 + std Y+21,r23 + ldd r23,Y+30 + std Y+22,r23 + ldd r23,Y+31 + std Y+23,r23 + ldd r23,Y+32 + std Y+24,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+25,r20 + std Y+26,r18 + std Y+27,r26 + std Y+28,r25 + std Y+29,r24 + std Y+30,r21 + std Y+31,r27 + std Y+32,r19 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + breq 5183f + rjmp 83b +5183: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r2 + std Z+1,r3 + std Z+2,r4 + std Z+3,r5 + std Z+4,r6 + std Z+5,r7 + std Z+6,r8 + std Z+7,r9 + std Z+8,r10 + std Z+9,r11 + std Z+10,r12 + std Z+11,r13 + std Z+12,r14 + std Z+13,r15 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r20,Y+23 + ldd r21,Y+24 + std Z+36,r18 + std Z+37,r19 + std Z+38,r20 + std Z+39,r21 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + std Z+40,r18 + std Z+41,r19 + std Z+42,r20 + std Z+43,r21 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r20,Y+31 + ldd r21,Y+32 + std Z+44,r18 + std Z+45,r19 + std Z+46,r20 + std Z+47,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_384_reverse_tk, .-forkskinny_128_384_reverse_tk + + .text +.global forkskinny_64_192_rounds + .type forkskinny_64_192_rounds, @function +forkskinny_64_192_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 38 + ldd r26,Z+24 + ldd r27,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + ld r18,Z + ldd r19,Z+1 + std Y+1,r18 + std Y+2,r19 + ldd r18,Z+2 + ldd r19,Z+3 + std Y+3,r18 + std Y+4,r19 + ldd r18,Z+4 + ldd r19,Z+5 + std Y+5,r18 + std Y+6,r19 + ldd r18,Z+6 + ldd r19,Z+7 + std Y+7,r18 + std Y+8,r19 + ldd r18,Z+8 + ldd r19,Z+9 + std Y+9,r18 + std Y+10,r19 + ldd r18,Z+10 + ldd r19,Z+11 + std Y+11,r18 + std Y+12,r19 + ldd r18,Z+12 + ldd r19,Z+13 + std Y+13,r18 + std Y+14,r19 + ldd r18,Z+14 + ldd r19,Z+15 + std Y+15,r18 + std Y+16,r19 + ldd r18,Z+16 + ldd r19,Z+17 + std Y+17,r18 + std Y+18,r19 + ldd r18,Z+18 + ldd r19,Z+19 + std Y+19,r18 + std Y+20,r19 + ldd r18,Z+20 + ldd r19,Z+21 + std Y+21,r18 + std Y+22,r19 + ldd r18,Z+22 + ldd r19,Z+23 + std Y+23,r18 + std Y+24,r19 + push r31 + push r30 + ldi r30,lo8(table_5) + ldi r31,hi8(table_5) +#if defined(RAMPZ) + ldi r18,hh8(table_5) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 + lsl r20 +61: + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + eor r26,r18 + eor r27,r19 + ldd r18,Y+3 + ldd r19,Y+4 + eor r2,r18 + eor r3,r19 + ldd r18,Y+9 + ldd r19,Y+10 + eor r26,r18 + eor r27,r19 + ldd r18,Y+11 + ldd r19,Y+12 + eor r2,r18 + eor r3,r19 + ldd r18,Y+17 + ldd r19,Y+18 + eor r26,r18 + eor r27,r19 + ldd r18,Y+19 + ldd r19,Y+20 + eor r2,r18 + eor r3,r19 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r27,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r3,r18 + ldi r18,32 + eor r5,r18 + eor r26,r18 + mov r0,r1 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + or r3,r0 + mov r0,r4 + mov r4,r5 + mov r5,r0 + mov r0,r6 + mov r6,r7 + mov r7,r0 + mov r0,r1 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + or r7,r0 + eor r2,r4 + eor r3,r5 + eor r4,r26 + eor r5,r27 + movw r18,r6 + eor r18,r4 + eor r19,r5 + movw r6,r4 + movw r4,r2 + movw r2,r26 + movw r26,r18 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r8,Y+3 + ldd r9,Y+4 + ldd r10,Y+5 + ldd r11,Y+6 + ldd r12,Y+7 + ldd r13,Y+8 + std Y+5,r18 + std Y+6,r19 + std Y+7,r8 + std Y+8,r9 + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + ldi r25,240 + and r9,r25 + swap r12 + ldi r24,15 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+1,r18 + std Y+2,r19 + std Y+3,r8 + std Y+4,r9 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r18,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+9 + ldd r19,Y+10 + ldd r8,Y+11 + ldd r9,Y+12 + ldd r10,Y+13 + ldd r11,Y+14 + ldd r12,Y+15 + ldd r13,Y+16 + std Y+13,r18 + std Y+14,r19 + std Y+15,r8 + std Y+16,r9 + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + and r9,r25 + swap r12 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+9,r18 + std Y+10,r19 + std Y+11,r8 + std Y+12,r9 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r8,Y+19 + ldd r9,Y+20 + ldd r10,Y+21 + ldd r11,Y+22 + ldd r12,Y+23 + ldd r13,Y+24 + std Y+21,r18 + std Y+22,r19 + std Y+23,r8 + std Y+24,r9 + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + and r9,r25 + swap r12 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+17,r18 + std Y+18,r19 + std Y+19,r8 + std Y+20,r9 + ldi r30,lo8(table_5) + ldi r31,hi8(table_5) +#if defined(RAMPZ) + ldi r18,hh8(table_5) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + cp r22,r20 + breq 5273f + rjmp 61b +5273: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+24,r26 + std Z+25,r27 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Y+1 + ldd r19,Y+2 + st Z,r18 + std Z+1,r19 + ldd r18,Y+3 + ldd r19,Y+4 + std Z+2,r18 + std Z+3,r19 + ldd r18,Y+5 + ldd r19,Y+6 + std Z+4,r18 + std Z+5,r19 + ldd r18,Y+7 + ldd r19,Y+8 + std Z+6,r18 + std Z+7,r19 + ldd r18,Y+9 + ldd r19,Y+10 + std Z+8,r18 + std Z+9,r19 + ldd r18,Y+11 + ldd r19,Y+12 + std Z+10,r18 + std Z+11,r19 + ldd r18,Y+13 + ldd r19,Y+14 + std Z+12,r18 + std Z+13,r19 + ldd r18,Y+15 + ldd r19,Y+16 + std Z+14,r18 + std Z+15,r19 + ldd r18,Y+17 + ldd r19,Y+18 + std Z+16,r18 + std Z+17,r19 + ldd r18,Y+19 + ldd r19,Y+20 + std Z+18,r18 + std Z+19,r19 + ldd r18,Y+21 + ldd r19,Y+22 + std Z+20,r18 + std Z+21,r19 + ldd r18,Y+23 + ldd r19,Y+24 + std Z+22,r18 + std Z+23,r19 + adiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_rounds, .-forkskinny_64_192_rounds + + .text +.global forkskinny_64_192_inv_rounds + .type forkskinny_64_192_inv_rounds, @function +forkskinny_64_192_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 38 + ldd r26,Z+24 + ldd r27,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + ld r18,Z + ldd r19,Z+1 + std Y+1,r18 + std Y+2,r19 + ldd r18,Z+2 + ldd r19,Z+3 + std Y+3,r18 + std Y+4,r19 + ldd r18,Z+4 + ldd r19,Z+5 + std Y+5,r18 + std Y+6,r19 + ldd r18,Z+6 + ldd r19,Z+7 + std Y+7,r18 + std Y+8,r19 + ldd r18,Z+8 + ldd r19,Z+9 + std Y+9,r18 + std Y+10,r19 + ldd r18,Z+10 + ldd r19,Z+11 + std Y+11,r18 + std Y+12,r19 + ldd r18,Z+12 + ldd r19,Z+13 + std Y+13,r18 + std Y+14,r19 + ldd r18,Z+14 + ldd r19,Z+15 + std Y+15,r18 + std Y+16,r19 + ldd r18,Z+16 + ldd r19,Z+17 + std Y+17,r18 + std Y+18,r19 + ldd r18,Z+18 + ldd r19,Z+19 + std Y+19,r18 + std Y+20,r19 + ldd r18,Z+20 + ldd r19,Z+21 + std Y+21,r18 + std Y+22,r19 + ldd r18,Z+22 + ldd r19,Z+23 + std Y+23,r18 + std Y+24,r19 + push r31 + push r30 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 + lsl r20 +61: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r8,Y+3 + ldd r9,Y+4 + ldd r10,Y+5 + ldd r11,Y+6 + ldd r12,Y+7 + ldd r13,Y+8 + std Y+1,r10 + std Y+2,r11 + std Y+3,r12 + std Y+4,r13 + mov r11,r18 + ldi r25,240 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+5,r10 + std Y+6,r11 + std Y+7,r12 + std Y+8,r13 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r8,Y+11 + ldd r9,Y+12 + ldd r10,Y+13 + ldd r11,Y+14 + ldd r12,Y+15 + ldd r13,Y+16 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r11,r18 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+13,r10 + std Y+14,r11 + std Y+15,r12 + std Y+16,r13 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r18,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r8,Y+19 + ldd r9,Y+20 + ldd r10,Y+21 + ldd r11,Y+22 + ldd r12,Y+23 + ldd r13,Y+24 + std Y+17,r10 + std Y+18,r11 + std Y+19,r12 + std Y+20,r13 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r11,r18 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+21,r10 + std Y+22,r11 + std Y+23,r12 + std Y+24,r13 + movw r18,r26 + movw r26,r2 + movw r2,r4 + movw r4,r6 + movw r6,r18 + eor r6,r4 + eor r7,r5 + eor r4,r26 + eor r5,r27 + eor r2,r4 + eor r3,r5 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + mov r0,r5 + mov r5,r4 + mov r4,r0 + mov r0,r7 + mov r7,r6 + mov r6,r0 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + ldd r18,Y+1 + ldd r19,Y+2 + eor r26,r18 + eor r27,r19 + ldd r18,Y+3 + ldd r19,Y+4 + eor r2,r18 + eor r3,r19 + ldd r18,Y+9 + ldd r19,Y+10 + eor r26,r18 + eor r27,r19 + ldd r18,Y+11 + ldd r19,Y+12 + eor r2,r18 + eor r3,r19 + ldd r18,Y+17 + ldd r19,Y+18 + eor r26,r18 + eor r27,r19 + ldd r18,Y+19 + ldd r19,Y+20 + eor r2,r18 + eor r3,r19 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r3,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r27,r18 + ldi r18,32 + eor r5,r18 + eor r26,r18 + ldi r30,lo8(table_6) + ldi r31,hi8(table_6) +#if defined(RAMPZ) + ldi r18,hh8(table_6) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + cp r22,r20 + breq 5268f + rjmp 61b +5268: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+24,r26 + std Z+25,r27 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Y+1 + ldd r19,Y+2 + st Z,r18 + std Z+1,r19 + ldd r18,Y+3 + ldd r19,Y+4 + std Z+2,r18 + std Z+3,r19 + ldd r18,Y+5 + ldd r19,Y+6 + std Z+4,r18 + std Z+5,r19 + ldd r18,Y+7 + ldd r19,Y+8 + std Z+6,r18 + std Z+7,r19 + ldd r18,Y+9 + ldd r19,Y+10 + std Z+8,r18 + std Z+9,r19 + ldd r18,Y+11 + ldd r19,Y+12 + std Z+10,r18 + std Z+11,r19 + ldd r18,Y+13 + ldd r19,Y+14 + std Z+12,r18 + std Z+13,r19 + ldd r18,Y+15 + ldd r19,Y+16 + std Z+14,r18 + std Z+15,r19 + ldd r18,Y+17 + ldd r19,Y+18 + std Z+16,r18 + std Z+17,r19 + ldd r18,Y+19 + ldd r19,Y+20 + std Z+18,r18 + std Z+19,r19 + ldd r18,Y+21 + ldd r19,Y+22 + std Z+20,r18 + std Z+21,r19 + ldd r18,Y+23 + ldd r19,Y+24 + std Z+22,r18 + std Z+23,r19 + adiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_inv_rounds, .-forkskinny_64_192_inv_rounds + + .text +.global forkskinny_64_192_forward_tk + .type forkskinny_64_192_forward_tk, @function +forkskinny_64_192_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r28,Z+6 + ldd r29,Z+7 + ldd r2,Z+8 + ldd r3,Z+9 + ldd r4,Z+10 + ldd r5,Z+11 + ldd r6,Z+12 + ldd r7,Z+13 + ldd r8,Z+14 + ldd r9,Z+15 + ldd r10,Z+16 + ldd r11,Z+17 + ldd r12,Z+18 + ldd r13,Z+19 + ldd r14,Z+20 + ldd r15,Z+21 + ldd r24,Z+22 + ldd r25,Z+23 + push r31 + push r30 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +27: + push r19 + push r18 + push r21 + push r20 + mov r19,r27 + swap r19 + andi r19,240 + mov r23,r28 + andi r23,15 + or r19,r23 + mov r18,r27 + andi r18,240 + mov r23,r29 + andi r23,15 + or r18,r23 + mov r21,r26 + andi r21,240 + swap r28 + andi r28,15 + or r21,r28 + mov r20,r29 + andi r20,240 + andi r26,15 + or r20,r26 + pop r28 + pop r29 + pop r26 + pop r27 + push r3 + push r2 + push r5 + push r4 + mov r3,r7 + swap r3 + ldi r17,240 + and r3,r17 + mov r23,r8 + andi r23,15 + or r3,r23 + mov r2,r7 + and r2,r17 + mov r23,r9 + andi r23,15 + or r2,r23 + mov r5,r6 + and r5,r17 + swap r8 + ldi r16,15 + and r8,r16 + or r5,r8 + mov r4,r9 + and r4,r17 + and r6,r16 + or r4,r6 + pop r8 + pop r9 + pop r6 + pop r7 + push r11 + push r10 + push r13 + push r12 + mov r11,r15 + swap r11 + and r11,r17 + mov r23,r24 + andi r23,15 + or r11,r23 + mov r10,r15 + and r10,r17 + mov r23,r25 + andi r23,15 + or r10,r23 + mov r13,r14 + and r13,r17 + swap r24 + andi r24,15 + or r13,r24 + mov r12,r25 + and r12,r17 + and r14,r16 + or r12,r14 + pop r24 + pop r25 + pop r14 + pop r15 + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + dec r22 + breq 5125f + rjmp 27b +5125: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r26 + std Z+5,r27 + std Z+6,r28 + std Z+7,r29 + std Z+8,r2 + std Z+9,r3 + std Z+10,r4 + std Z+11,r5 + std Z+12,r6 + std Z+13,r7 + std Z+14,r8 + std Z+15,r9 + std Z+16,r10 + std Z+17,r11 + std Z+18,r12 + std Z+19,r13 + std Z+20,r14 + std Z+21,r15 + std Z+22,r24 + std Z+23,r25 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_forward_tk, .-forkskinny_64_192_forward_tk + + .text +.global forkskinny_64_192_reverse_tk + .type forkskinny_64_192_reverse_tk, @function +forkskinny_64_192_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r28,Z+6 + ldd r29,Z+7 + ldd r2,Z+8 + ldd r3,Z+9 + ldd r4,Z+10 + ldd r5,Z+11 + ldd r6,Z+12 + ldd r7,Z+13 + ldd r8,Z+14 + ldd r9,Z+15 + ldd r10,Z+16 + ldd r11,Z+17 + ldd r12,Z+18 + ldd r13,Z+19 + ldd r14,Z+20 + ldd r15,Z+21 + ldd r24,Z+22 + ldd r25,Z+23 + push r31 + push r30 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +27: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + push r27 + push r26 + push r29 + push r28 + mov r27,r18 + andi r27,240 + mov r23,r19 + swap r23 + andi r23,15 + or r27,r23 + mov r26,r21 + andi r26,240 + mov r23,r20 + andi r23,15 + or r26,r23 + mov r29,r20 + andi r29,240 + andi r18,15 + or r29,r18 + mov r28,r21 + swap r28 + andi r28,240 + andi r19,15 + or r28,r19 + pop r20 + pop r21 + pop r18 + pop r19 + push r7 + push r6 + push r9 + push r8 + mov r7,r2 + ldi r17,240 + and r7,r17 + mov r23,r3 + swap r23 + andi r23,15 + or r7,r23 + mov r6,r5 + and r6,r17 + mov r23,r4 + andi r23,15 + or r6,r23 + mov r9,r4 + and r9,r17 + ldi r16,15 + and r2,r16 + or r9,r2 + mov r8,r5 + swap r8 + and r8,r17 + and r3,r16 + or r8,r3 + pop r4 + pop r5 + pop r2 + pop r3 + push r15 + push r14 + push r25 + push r24 + mov r15,r10 + and r15,r17 + mov r23,r11 + swap r23 + andi r23,15 + or r15,r23 + mov r14,r13 + and r14,r17 + mov r23,r12 + andi r23,15 + or r14,r23 + mov r25,r12 + andi r25,240 + and r10,r16 + or r25,r10 + mov r24,r13 + swap r24 + andi r24,240 + and r11,r16 + or r24,r11 + pop r12 + pop r13 + pop r10 + pop r11 + dec r22 + breq 5125f + rjmp 27b +5125: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r26 + std Z+5,r27 + std Z+6,r28 + std Z+7,r29 + std Z+8,r2 + std Z+9,r3 + std Z+10,r4 + std Z+11,r5 + std Z+12,r6 + std Z+13,r7 + std Z+14,r8 + std Z+15,r9 + std Z+16,r10 + std Z+17,r11 + std Z+18,r12 + std Z+19,r13 + std Z+20,r14 + std Z+21,r15 + std Z+22,r24 + std Z+23,r25 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_reverse_tk, .-forkskinny_64_192_reverse_tk + +#endif diff --git a/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/rhys/internal-forkskinny.c b/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/rhys/internal-forkskinny.c index b050ff1..6e2ac55 100644 --- a/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/rhys/internal-forkskinny.c +++ b/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/rhys/internal-forkskinny.c @@ -40,35 +40,10 @@ static unsigned char const RC[87] = { 0x4a, 0x14, 0x29, 0x52, 0x24, 0x48, 0x10 }; -/** - * \brief Number of rounds of ForkSkinny-128-256 before forking. - */ -#define FORKSKINNY_128_256_ROUNDS_BEFORE 21 - -/** - * \brief Number of rounds of ForkSkinny-128-256 after forking. - */ -#define FORKSKINNY_128_256_ROUNDS_AFTER 27 - -/** - * \brief State information for ForkSkinny-128-256. - */ -typedef struct -{ - uint32_t TK1[4]; /**< First part of the tweakey */ - uint32_t TK2[4]; /**< Second part of the tweakey */ - uint32_t S[4]; /**< Current block state */ +#if !defined(__AVR__) -} forkskinny_128_256_state_t; - -/** - * \brief Applies one round of ForkSkinny-128-256. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_256_round - (forkskinny_128_256_state_t *state, unsigned round) +void forkskinny_128_256_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -79,137 +54,52 @@ static void forkskinny_128_256_round s2 = state->S[2]; s3 = state->S[3]; - /* Apply the S-box to all cells in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny128_sbox(s0); + skinny128_sbox(s1); + skinny128_sbox(s2); + skinny128_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); + s2 ^= 0x02; + + /* Shift the cells in the rows right, which moves the cell + * values up closer to the MSB. That is, we do a left rotate + * on the word to rotate the cells in the word right */ + s1 = leftRotate8(s1); + s2 = leftRotate16(s2); + s3 = leftRotate24(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + + /* Permute TK1 and TK2 for the next round */ + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + } /* Save the local variables back to the state */ state->S[0] = s0; state->S[1] = s1; state->S[2] = s2; state->S[3] = s3; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(state->TK1); - skinny128_permute_tk(state->TK2); - skinny128_LFSR2(state->TK2[0]); - skinny128_LFSR2(state->TK2[1]); -} - -void forkskinny_128_256_encrypt - (const unsigned char key[32], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_256_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_128_256_ROUNDS_BEFORE; ++round) { - forkskinny_128_256_round(&state, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint32_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&state, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x08040201U; /* Branching constant */ - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - for (round = (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); ++round) { - forkskinny_128_256_round(&state, round); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&state, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - } } -/** - * \brief Applies one round of ForkSkinny-128-256 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_256_inv_round - (forkskinny_128_256_state_t *state, unsigned round) +void forkskinny_128_256_inv_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -220,39 +110,42 @@ static void forkskinny_128_256_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Permute TK1 and TK2 for the next round */ - skinny128_inv_LFSR2(state->TK2[0]); - skinny128_inv_LFSR2(state->TK2[1]); - skinny128_inv_permute_tk(state->TK1); - skinny128_inv_permute_tk(state->TK2); - - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left, which moves the cell - * values down closer to the LSB. That is, we do a right - * rotate on the word to rotate the cells in the word left */ - s1 = rightRotate8(s1); - s2 = rightRotate16(s2); - s3 = rightRotate24(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1 and TK2 for the next round */ + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left, which moves the cell + * values down closer to the LSB. That is, we do a right + * rotate on the word to rotate the cells in the word left */ + s1 = rightRotate8(s1); + s2 = rightRotate16(s2); + s3 = rightRotate24(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); + s2 ^= 0x02; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny128_inv_sbox(s0); + skinny128_inv_sbox(s1); + skinny128_inv_sbox(s2); + skinny128_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -261,115 +154,64 @@ static void forkskinny_128_256_inv_round state->S[3] = s3; } -void forkskinny_128_256_decrypt - (const unsigned char key[32], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_128_256_forward_tk + (forkskinny_128_256_state_t *state, unsigned rounds) { - forkskinny_128_256_state_t state; - forkskinny_128_256_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); ++round) { - skinny128_permute_tk(state.TK1); - skinny128_permute_tk(state.TK2); - skinny128_LFSR2(state.TK2[0]); - skinny128_LFSR2(state.TK2[1]); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR2(state->TK2[2]); + skinny128_LFSR2(state->TK2[3]); + } + rounds -= 16; } - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); - round > (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); --round) { - forkskinny_128_256_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + --rounds; } +} - /* Remove the branching constant */ - state.S[0] ^= 0x08040201U; - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_128_256_ROUNDS_AFTER; ++round) { - skinny128_inv_LFSR2(state.TK2[0]); - skinny128_inv_LFSR2(state.TK2[1]); - skinny128_inv_permute_tk(state.TK1); - skinny128_inv_permute_tk(state.TK2); +void forkskinny_128_256_reverse_tk + (forkskinny_128_256_state_t *state, unsigned rounds) +{ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR2(state->TK2[2]); + skinny128_inv_LFSR2(state->TK2[3]); + } + rounds -= 16; } - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_128_256_inv_round(&state, round - 1); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&fstate, round); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + --rounds; } - le_store_word32(output_right, fstate.S[0]); - le_store_word32(output_right + 4, fstate.S[1]); - le_store_word32(output_right + 8, fstate.S[2]); - le_store_word32(output_right + 12, fstate.S[3]); } -/** - * \brief Number of rounds of ForkSkinny-128-384 before forking. - */ -#define FORKSKINNY_128_384_ROUNDS_BEFORE 25 - -/** - * \brief Number of rounds of ForkSkinny-128-384 after forking. - */ -#define FORKSKINNY_128_384_ROUNDS_AFTER 31 - -/** - * \brief State information for ForkSkinny-128-384. - */ -typedef struct -{ - uint32_t TK1[4]; /**< First part of the tweakey */ - uint32_t TK2[4]; /**< Second part of the tweakey */ - uint32_t TK3[4]; /**< Third part of the tweakey */ - uint32_t S[4]; /**< Current block state */ - -} forkskinny_128_384_state_t; - -/** - * \brief Applies one round of ForkSkinny-128-384. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_384_round - (forkskinny_128_384_state_t *state, unsigned round) +void forkskinny_128_384_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -380,145 +222,56 @@ static void forkskinny_128_384_round s2 = state->S[2]; s3 = state->S[3]; - /* Apply the S-box to all cells in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - (rc & 0x0F) ^ 0x00020000; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny128_sbox(s0); + skinny128_sbox(s1); + skinny128_sbox(s2); + skinny128_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + (rc & 0x0F) ^ 0x00020000; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); + s2 ^= 0x02; + + /* Shift the cells in the rows right, which moves the cell + * values up closer to the MSB. That is, we do a left rotate + * on the word to rotate the cells in the word right */ + s1 = leftRotate8(s1); + s2 = leftRotate16(s2); + s3 = leftRotate24(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + + /* Permute TK1, TK2, and TK3 for the next round */ + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_permute_tk(state->TK3); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + } /* Save the local variables back to the state */ state->S[0] = s0; state->S[1] = s1; state->S[2] = s2; state->S[3] = s3; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny128_permute_tk(state->TK1); - skinny128_permute_tk(state->TK2); - skinny128_permute_tk(state->TK3); - skinny128_LFSR2(state->TK2[0]); - skinny128_LFSR2(state->TK2[1]); - skinny128_LFSR3(state->TK3[0]); - skinny128_LFSR3(state->TK3[1]); } -void forkskinny_128_384_encrypt - (const unsigned char key[48], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_384_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.TK3[0] = le_load_word32(key + 32); - state.TK3[1] = le_load_word32(key + 36); - state.TK3[2] = le_load_word32(key + 40); - state.TK3[3] = le_load_word32(key + 44); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_128_384_ROUNDS_BEFORE; ++round) { - forkskinny_128_384_round(&state, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint32_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&state, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x08040201U; /* Branching constant */ - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - for (round = (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); ++round) { - forkskinny_128_384_round(&state, round); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&state, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - } -} - -/** - * \brief Applies one round of ForkSkinny-128-384 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_384_inv_round - (forkskinny_128_384_state_t *state, unsigned round) +void forkskinny_128_384_inv_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -529,43 +282,46 @@ static void forkskinny_128_384_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Permute TK1 and TK2 for the next round */ - skinny128_inv_LFSR2(state->TK2[0]); - skinny128_inv_LFSR2(state->TK2[1]); - skinny128_inv_LFSR3(state->TK3[0]); - skinny128_inv_LFSR3(state->TK3[1]); - skinny128_inv_permute_tk(state->TK1); - skinny128_inv_permute_tk(state->TK2); - skinny128_inv_permute_tk(state->TK3); - - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left, which moves the cell - * values down closer to the LSB. That is, we do a right - * rotate on the word to rotate the cells in the word left */ - s1 = rightRotate8(s1); - s2 = rightRotate16(s2); - s3 = rightRotate24(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - (rc & 0x0F) ^ 0x00020000; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1 and TK2 for the next round */ + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + skinny128_inv_permute_tk(state->TK3); + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left, which moves the cell + * values down closer to the LSB. That is, we do a right + * rotate on the word to rotate the cells in the word left */ + s1 = rightRotate8(s1); + s2 = rightRotate16(s2); + s3 = rightRotate24(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + (rc & 0x0F) ^ 0x00020000; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); + s2 ^= 0x02; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny128_inv_sbox(s0); + skinny128_inv_sbox(s1); + skinny128_inv_sbox(s2); + skinny128_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -574,128 +330,78 @@ static void forkskinny_128_384_inv_round state->S[3] = s3; } -void forkskinny_128_384_decrypt - (const unsigned char key[48], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_128_384_forward_tk + (forkskinny_128_384_state_t *state, unsigned rounds) { - forkskinny_128_384_state_t state; - forkskinny_128_384_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.TK3[0] = le_load_word32(key + 32); - state.TK3[1] = le_load_word32(key + 36); - state.TK3[2] = le_load_word32(key + 40); - state.TK3[3] = le_load_word32(key + 44); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); ++round) { - skinny128_permute_tk(state.TK1); - skinny128_permute_tk(state.TK2); - skinny128_permute_tk(state.TK3); - skinny128_LFSR2(state.TK2[0]); - skinny128_LFSR2(state.TK2[1]); - skinny128_LFSR3(state.TK3[0]); - skinny128_LFSR3(state.TK3[1]); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR2(state->TK2[2]); + skinny128_LFSR2(state->TK2[3]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + skinny128_LFSR3(state->TK3[2]); + skinny128_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); - round > (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); --round) { - forkskinny_128_384_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_permute_tk(state->TK3); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + --rounds; } +} - /* Remove the branching constant */ - state.S[0] ^= 0x08040201U; - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_128_384_ROUNDS_AFTER; ++round) { - skinny128_inv_LFSR2(state.TK2[0]); - skinny128_inv_LFSR2(state.TK2[1]); - skinny128_inv_LFSR3(state.TK3[0]); - skinny128_inv_LFSR3(state.TK3[1]); - skinny128_inv_permute_tk(state.TK1); - skinny128_inv_permute_tk(state.TK2); - skinny128_inv_permute_tk(state.TK3); +void forkskinny_128_384_reverse_tk + (forkskinny_128_384_state_t *state, unsigned rounds) +{ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR2(state->TK2[2]); + skinny128_inv_LFSR2(state->TK2[3]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_LFSR3(state->TK3[2]); + skinny128_inv_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_128_384_inv_round(&state, round - 1); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&fstate, round); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + skinny128_inv_permute_tk(state->TK3); + --rounds; } - le_store_word32(output_right, fstate.S[0]); - le_store_word32(output_right + 4, fstate.S[1]); - le_store_word32(output_right + 8, fstate.S[2]); - le_store_word32(output_right + 12, fstate.S[3]); } -/** - * \brief Number of rounds of ForkSkinny-64-192 before forking. - */ -#define FORKSKINNY_64_192_ROUNDS_BEFORE 17 - -/** - * \brief Number of rounds of ForkSkinny-64-192 after forking. - */ -#define FORKSKINNY_64_192_ROUNDS_AFTER 23 - -/** - * \brief State information for ForkSkinny-64-192. - */ -typedef struct -{ - uint16_t TK1[4]; /**< First part of the tweakey */ - uint16_t TK2[4]; /**< Second part of the tweakey */ - uint16_t TK3[4]; /**< Third part of the tweakey */ - uint16_t S[4]; /**< Current block state */ - -} forkskinny_64_192_state_t; - -/** - * \brief Applies one round of ForkSkinny-64-192. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - * - * Note: The cells of each row are order in big-endian nibble order - * so it is easiest to manage the rows in bit-endian byte order. - */ -static void forkskinny_64_192_round - (forkskinny_64_192_state_t *state, unsigned round) +void forkskinny_64_192_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last) { uint16_t s0, s1, s2, s3, temp; uint8_t rc; @@ -706,144 +412,55 @@ static void forkskinny_64_192_round s2 = state->S[2]; s3 = state->S[3]; - /* Apply the S-box to all cells in the state */ - skinny64_sbox(s0); - skinny64_sbox(s1); - skinny64_sbox(s2); - skinny64_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - ((rc & 0x0F) << 12) ^ 0x0020; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ - ((rc & 0x70) << 8); - s2 ^= 0x2000; - - /* Shift the cells in the rows right */ - s1 = rightRotate4_16(s1); - s2 = rightRotate8_16(s2); - s3 = rightRotate12_16(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny64_sbox(s0); + skinny64_sbox(s1); + skinny64_sbox(s2); + skinny64_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + ((rc & 0x0F) << 12) ^ 0x0020; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ + ((rc & 0x70) << 8); + s2 ^= 0x2000; + + /* Shift the cells in the rows right */ + s1 = rightRotate4_16(s1); + s2 = rightRotate8_16(s2); + s3 = rightRotate12_16(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + + /* Permute TK1, TK2, and TK3 for the next round */ + skinny64_permute_tk(state->TK1); + skinny64_permute_tk(state->TK2); + skinny64_permute_tk(state->TK3); + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + } /* Save the local variables back to the state */ state->S[0] = s0; state->S[1] = s1; state->S[2] = s2; state->S[3] = s3; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny64_permute_tk(state->TK1); - skinny64_permute_tk(state->TK2); - skinny64_permute_tk(state->TK3); - skinny64_LFSR2(state->TK2[0]); - skinny64_LFSR2(state->TK2[1]); - skinny64_LFSR3(state->TK3[0]); - skinny64_LFSR3(state->TK3[1]); } -void forkskinny_64_192_encrypt - (const unsigned char key[24], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_64_192_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = be_load_word16(key); - state.TK1[1] = be_load_word16(key + 2); - state.TK1[2] = be_load_word16(key + 4); - state.TK1[3] = be_load_word16(key + 6); - state.TK2[0] = be_load_word16(key + 8); - state.TK2[1] = be_load_word16(key + 10); - state.TK2[2] = be_load_word16(key + 12); - state.TK2[3] = be_load_word16(key + 14); - state.TK3[0] = be_load_word16(key + 16); - state.TK3[1] = be_load_word16(key + 18); - state.TK3[2] = be_load_word16(key + 20); - state.TK3[3] = be_load_word16(key + 22); - state.S[0] = be_load_word16(input); - state.S[1] = be_load_word16(input + 2); - state.S[2] = be_load_word16(input + 4); - state.S[3] = be_load_word16(input + 6); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_64_192_ROUNDS_BEFORE; ++round) { - forkskinny_64_192_round(&state, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint16_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_right, state.S[0]); - be_store_word16(output_right + 2, state.S[1]); - be_store_word16(output_right + 4, state.S[2]); - be_store_word16(output_right + 6, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x1249U; /* Branching constant */ - state.S[1] ^= 0x36daU; - state.S[2] ^= 0x5b7fU; - state.S[3] ^= 0xec81U; - for (round = (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_left, state.S[0]); - be_store_word16(output_left + 2, state.S[1]); - be_store_word16(output_left + 4, state.S[2]); - be_store_word16(output_left + 6, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_right, state.S[0]); - be_store_word16(output_right + 2, state.S[1]); - be_store_word16(output_right + 4, state.S[2]); - be_store_word16(output_right + 6, state.S[3]); - } -} - -/** - * \brief Applies one round of ForkSkinny-64-192 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_64_192_inv_round - (forkskinny_64_192_state_t *state, unsigned round) +void forkskinny_64_192_inv_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last) { uint16_t s0, s1, s2, s3, temp; uint8_t rc; @@ -854,42 +471,45 @@ static void forkskinny_64_192_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Permute TK1, TK2, and TK3 for the next round */ - skinny64_inv_LFSR2(state->TK2[0]); - skinny64_inv_LFSR2(state->TK2[1]); - skinny64_inv_LFSR3(state->TK3[0]); - skinny64_inv_LFSR3(state->TK3[1]); - skinny64_inv_permute_tk(state->TK1); - skinny64_inv_permute_tk(state->TK2); - skinny64_inv_permute_tk(state->TK3); - - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left */ - s1 = leftRotate4_16(s1); - s2 = leftRotate8_16(s2); - s3 = leftRotate12_16(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - ((rc & 0x0F) << 12) ^ 0x0020; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ - ((rc & 0x70) << 8); - s2 ^= 0x2000; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny64_inv_sbox(s0); - skinny64_inv_sbox(s1); - skinny64_inv_sbox(s2); - skinny64_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1, TK2, and TK3 for the next round */ + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_permute_tk(state->TK1); + skinny64_inv_permute_tk(state->TK2); + skinny64_inv_permute_tk(state->TK3); + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left */ + s1 = leftRotate4_16(s1); + s2 = leftRotate8_16(s2); + s3 = leftRotate12_16(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + ((rc & 0x0F) << 12) ^ 0x0020; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ + ((rc & 0x70) << 8); + s2 ^= 0x2000; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny64_inv_sbox(s0); + skinny64_inv_sbox(s1); + skinny64_inv_sbox(s2); + skinny64_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -898,91 +518,74 @@ static void forkskinny_64_192_inv_round state->S[3] = s3; } -void forkskinny_64_192_decrypt - (const unsigned char key[24], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_64_192_forward_tk + (forkskinny_64_192_state_t *state, unsigned rounds) { - forkskinny_64_192_state_t state; - forkskinny_64_192_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = be_load_word16(key); - state.TK1[1] = be_load_word16(key + 2); - state.TK1[2] = be_load_word16(key + 4); - state.TK1[3] = be_load_word16(key + 6); - state.TK2[0] = be_load_word16(key + 8); - state.TK2[1] = be_load_word16(key + 10); - state.TK2[2] = be_load_word16(key + 12); - state.TK2[3] = be_load_word16(key + 14); - state.TK3[0] = be_load_word16(key + 16); - state.TK3[1] = be_load_word16(key + 18); - state.TK3[2] = be_load_word16(key + 20); - state.TK3[3] = be_load_word16(key + 22); - state.S[0] = be_load_word16(input); - state.S[1] = be_load_word16(input + 2); - state.S[2] = be_load_word16(input + 4); - state.S[3] = be_load_word16(input + 6); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); ++round) { - skinny64_permute_tk(state.TK1); - skinny64_permute_tk(state.TK2); - skinny64_permute_tk(state.TK3); - skinny64_LFSR2(state.TK2[0]); - skinny64_LFSR2(state.TK2[1]); - skinny64_LFSR3(state.TK3[0]); - skinny64_LFSR3(state.TK3[1]); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny64_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR2(state->TK2[2]); + skinny64_LFSR2(state->TK2[3]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + skinny64_LFSR3(state->TK3[2]); + skinny64_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); - round > (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); --round) { - forkskinny_64_192_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny64_permute_tk(state->TK1); + skinny64_permute_tk(state->TK2); + skinny64_permute_tk(state->TK3); + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + --rounds; } +} - /* Remove the branching constant */ - state.S[0] ^= 0x1249U; - state.S[1] ^= 0x36daU; - state.S[2] ^= 0x5b7fU; - state.S[3] ^= 0xec81U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_64_192_ROUNDS_AFTER; ++round) { - skinny64_inv_LFSR2(state.TK2[0]); - skinny64_inv_LFSR2(state.TK2[1]); - skinny64_inv_LFSR3(state.TK3[0]); - skinny64_inv_LFSR3(state.TK3[1]); - skinny64_inv_permute_tk(state.TK1); - skinny64_inv_permute_tk(state.TK2); - skinny64_inv_permute_tk(state.TK3); +void forkskinny_64_192_reverse_tk + (forkskinny_64_192_state_t *state, unsigned rounds) +{ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny64_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR2(state->TK2[2]); + skinny64_inv_LFSR2(state->TK2[3]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_LFSR3(state->TK3[2]); + skinny64_inv_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_64_192_inv_round(&state, round - 1); - } - be_store_word16(output_left, state.S[0]); - be_store_word16(output_left + 2, state.S[1]); - be_store_word16(output_left + 4, state.S[2]); - be_store_word16(output_left + 6, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&fstate, round); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_permute_tk(state->TK1); + skinny64_inv_permute_tk(state->TK2); + skinny64_inv_permute_tk(state->TK3); + --rounds; } - be_store_word16(output_right, fstate.S[0]); - be_store_word16(output_right + 2, fstate.S[1]); - be_store_word16(output_right + 4, fstate.S[2]); - be_store_word16(output_right + 6, fstate.S[3]); } + +#endif /* !__AVR__ */ diff --git a/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/rhys/internal-forkskinny.h b/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/rhys/internal-forkskinny.h index 0c1a707..e3014d3 100644 --- a/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/rhys/internal-forkskinny.h +++ b/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/rhys/internal-forkskinny.h @@ -23,6 +23,8 @@ #ifndef LW_INTERNAL_FORKSKINNY_H #define LW_INTERNAL_FORKSKINNY_H +#include "internal-util.h" + /** * \file internal-forkskinny.h * \brief ForkSkinny block cipher family. @@ -39,6 +41,158 @@ extern "C" { #endif /** + * \brief State information for ForkSkinny-128-256. + */ +typedef struct +{ + uint32_t TK1[4]; /**< First part of the tweakey */ + uint32_t TK2[4]; /**< Second part of the tweakey */ + uint32_t S[4]; /**< Current block state */ + +} forkskinny_128_256_state_t; + +/** + * \brief State information for ForkSkinny-128-384. + */ +typedef struct +{ + uint32_t TK1[4]; /**< First part of the tweakey */ + uint32_t TK2[4]; /**< Second part of the tweakey */ + uint32_t TK3[4]; /**< Third part of the tweakey */ + uint32_t S[4]; /**< Current block state */ + +} forkskinny_128_384_state_t; + +/** + * \brief State information for ForkSkinny-64-192. + */ +typedef struct +{ + uint16_t TK1[4]; /**< First part of the tweakey */ + uint16_t TK2[4]; /**< Second part of the tweakey */ + uint16_t TK3[4]; /**< Third part of the tweakey */ + uint16_t S[4]; /**< Current block state */ + +} forkskinny_64_192_state_t; + +/** + * \brief Applies several rounds of ForkSkinny-128-256. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + */ +void forkskinny_128_256_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-128-256 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_128_256_inv_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-128-256. + * + * \param state Points to the ForkSkinny-128-256 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_128_256_forward_tk + (forkskinny_128_256_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-128-256. + * + * \param state Points to the ForkSkinny-128-256 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_128_256_reverse_tk + (forkskinny_128_256_state_t *state, unsigned rounds); + +/** + * \brief Applies several rounds of ForkSkinny-128-384. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + */ +void forkskinny_128_384_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-128-384 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_128_384_inv_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-128-384. + * + * \param state Points to the ForkSkinny-128-384 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_128_384_forward_tk + (forkskinny_128_384_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-128-384. + * + * \param state Points to the ForkSkinny-128-384 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_128_384_reverse_tk + (forkskinny_128_384_state_t *state, unsigned rounds); + +/** + * \brief Applies several rounds of ForkSkinny-64-192. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + * + * Note: The cells of each row are ordered in big-endian nibble order + * so it is simplest to manage the rows in big-endian byte order. + */ +void forkskinny_64_192_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-64-192 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_64_192_inv_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-64-192. + * + * \param state Points to the ForkSkinny-64-192 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_64_192_forward_tk + (forkskinny_64_192_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-64-192. + * + * \param state Points to the ForkSkinny-64-192 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_64_192_reverse_tk + (forkskinny_64_192_state_t *state, unsigned rounds); + +/** * \brief Encrypts a block of plaintext with ForkSkinny-128-256. * * \param key 256-bit tweakey for ForkSkinny-128-256. diff --git a/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/rhys/internal-skinnyutil.h b/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/rhys/internal-skinnyutil.h index 83136cb..8a5296d 100644 --- a/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/rhys/internal-skinnyutil.h +++ b/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/rhys/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: diff --git a/grain-128aead/Implementations/crypto_aead/grain128aead/rhys/grain128.h b/grain-128aead/Implementations/crypto_aead/grain128aead/rhys/grain128.h index c8d6de9..ef16440 100644 --- a/grain-128aead/Implementations/crypto_aead/grain128aead/rhys/grain128.h +++ b/grain-128aead/Implementations/crypto_aead/grain128aead/rhys/grain128.h @@ -33,6 +33,8 @@ * combination of a 128-bit linear feedback shift register (LFSR) and a * 128-bit non-linear feedback shift register (NFSR). It is a member of * the Grain family of stream ciphers. + * + * References: https://grain-128aead.github.io/ */ #ifdef __cplusplus diff --git a/grain-128aead/Implementations/crypto_aead/grain128aead/rhys/internal-grain128-avr.S b/grain-128aead/Implementations/crypto_aead/grain128aead/rhys/internal-grain128-avr.S new file mode 100644 index 0000000..007e486 --- /dev/null +++ b/grain-128aead/Implementations/crypto_aead/grain128aead/rhys/internal-grain128-avr.S @@ -0,0 +1,1947 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .text +.global grain128_core + .type grain128_core, @function +grain128_core: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r26,Z + ldd r27,Z+1 + ldd r28,Z+2 + ldd r29,Z+3 + ldd r2,Z+4 + ldd r3,Z+5 + ldd r4,Z+6 + ldd r5,Z+7 + ldd r6,Z+8 + ldd r7,Z+9 + ldd r8,Z+10 + ldd r9,Z+11 + ldd r10,Z+12 + ldd r11,Z+13 + ldd r12,Z+14 + ldd r13,Z+15 + eor r20,r26 + eor r21,r27 + eor r22,r28 + eor r23,r29 + mov r15,r26 + mov r24,r27 + mov r25,r28 + mov r1,r29 + mov r14,r5 + lsr r1 + ror r25 + ror r24 + ror r15 + ror r14 + eor r20,r14 + eor r21,r15 + eor r22,r24 + eor r23,r25 + mov r15,r2 + mov r24,r3 + mov r25,r4 + mov r1,r5 + mov r14,r9 + lsr r1 + ror r25 + ror r24 + ror r15 + ror r14 + lsr r1 + ror r25 + ror r24 + ror r15 + ror r14 + eor r20,r14 + eor r21,r15 + eor r22,r24 + eor r23,r25 + mov r15,r6 + mov r24,r7 + mov r25,r8 + mov r1,r9 + mov r14,r13 + lsr r1 + ror r25 + ror r24 + ror r15 + ror r14 + lsr r1 + ror r25 + ror r24 + ror r15 + ror r14 + eor r20,r14 + eor r21,r15 + eor r22,r24 + eor r23,r25 + mov r25,r6 + mov r1,r7 + mov r14,r11 + mov r15,r12 + mov r24,r13 + lsl r14 + rol r15 + rol r24 + rol r25 + rol r1 + eor r20,r15 + eor r21,r24 + eor r22,r25 + eor r23,r1 + eor r20,r10 + eor r21,r11 + eor r22,r12 + eor r23,r13 + st Z,r2 + std Z+1,r3 + std Z+2,r4 + std Z+3,r5 + std Z+4,r6 + std Z+5,r7 + std Z+6,r8 + std Z+7,r9 + std Z+8,r10 + std Z+9,r11 + std Z+10,r12 + std Z+11,r13 + std Z+12,r20 + std Z+13,r21 + std Z+14,r22 + std Z+15,r23 + eor r16,r26 + eor r17,r27 + eor r18,r28 + eor r19,r29 + ldd r26,Z+16 + ldd r27,Z+17 + ldd r28,Z+18 + ldd r29,Z+19 + ldd r2,Z+20 + ldd r3,Z+21 + ldd r4,Z+22 + ldd r5,Z+23 + ldd r6,Z+24 + ldd r7,Z+25 + ldd r8,Z+26 + ldd r9,Z+27 + ldd r10,Z+28 + ldd r11,Z+29 + ldd r12,Z+30 + ldd r13,Z+31 + eor r16,r26 + eor r17,r27 + eor r18,r28 + eor r19,r29 + mov r14,r26 + movw r20,r2 + movw r22,r4 + lsl r20 + rol r21 + rol r22 + rol r23 + rol r14 + lsl r20 + rol r21 + rol r22 + rol r23 + rol r14 + eor r16,r21 + eor r17,r22 + eor r18,r23 + eor r19,r14 + eor r16,r7 + eor r17,r8 + eor r18,r9 + eor r19,r2 + mov r14,r6 + movw r20,r10 + movw r22,r12 + lsl r20 + rol r21 + rol r22 + rol r23 + rol r14 + lsl r20 + rol r21 + rol r22 + rol r23 + rol r14 + lsl r20 + rol r21 + rol r22 + rol r23 + rol r14 + eor r16,r21 + eor r17,r22 + eor r18,r23 + eor r19,r14 + eor r16,r10 + eor r17,r11 + eor r18,r12 + eor r19,r13 + mov r21,r26 + mov r22,r27 + mov r23,r28 + mov r14,r29 + mov r20,r5 + lsl r20 + rol r21 + rol r22 + rol r23 + rol r14 + lsl r20 + rol r21 + rol r22 + rol r23 + rol r14 + lsl r20 + rol r21 + rol r22 + rol r23 + rol r14 + mov r25,r6 + mov r15,r7 + mov r1,r8 + mov r0,r9 + mov r24,r13 + lsl r24 + rol r25 + rol r15 + rol r1 + rol r0 + lsl r24 + rol r25 + rol r15 + rol r1 + rol r0 + lsl r24 + rol r25 + rol r15 + rol r1 + rol r0 + and r21,r25 + and r22,r15 + and r23,r1 + and r14,r0 + eor r16,r21 + eor r17,r22 + eor r18,r23 + eor r19,r14 + movw r22,r26 + mov r14,r28 + movw r20,r4 + lsl r20 + rol r21 + rol r22 + rol r23 + rol r14 + lsl r20 + rol r21 + rol r22 + rol r23 + rol r14 + lsl r20 + rol r21 + rol r22 + rol r23 + rol r14 + mov r15,r26 + mov r1,r27 + mov r0,r28 + movw r24,r4 + lsr r0 + ror r1 + ror r15 + ror r25 + ror r24 + lsr r0 + ror r1 + ror r15 + ror r25 + ror r24 + lsr r0 + ror r1 + ror r15 + ror r25 + ror r24 + and r21,r24 + and r22,r25 + and r23,r15 + and r14,r1 + eor r16,r21 + eor r17,r22 + eor r18,r23 + eor r19,r14 + mov r23,r26 + mov r14,r27 + mov r20,r3 + mov r21,r4 + mov r22,r5 + lsl r20 + rol r21 + rol r22 + rol r23 + rol r14 + mov r1,r26 + mov r0,r27 + mov r24,r3 + mov r25,r4 + mov r15,r5 + lsl r24 + rol r25 + rol r15 + rol r1 + rol r0 + lsl r24 + rol r25 + rol r15 + rol r1 + rol r0 + and r21,r25 + and r22,r15 + and r23,r1 + and r14,r0 + eor r16,r21 + eor r17,r22 + eor r18,r23 + eor r19,r14 + mov r14,r26 + movw r20,r2 + movw r22,r4 + lsl r20 + rol r21 + rol r22 + rol r23 + rol r14 + lsl r20 + rol r21 + rol r22 + rol r23 + rol r14 + lsl r20 + rol r21 + rol r22 + rol r23 + rol r14 + mov r0,r2 + movw r24,r6 + mov r15,r8 + mov r1,r9 + lsl r24 + rol r25 + rol r15 + rol r1 + rol r0 + lsl r24 + rol r25 + rol r15 + rol r1 + rol r0 + lsl r24 + rol r25 + rol r15 + rol r1 + rol r0 + and r21,r25 + and r22,r15 + and r23,r1 + and r14,r0 + eor r16,r21 + eor r17,r22 + eor r18,r23 + eor r19,r14 + mov r20,r9 + mov r21,r2 + mov r22,r3 + mov r23,r4 + and r20,r8 + and r21,r9 + and r22,r2 + and r23,r3 + eor r16,r20 + eor r17,r21 + eor r18,r22 + eor r19,r23 + mov r14,r2 + movw r20,r6 + movw r22,r8 + lsr r14 + ror r23 + ror r22 + ror r21 + ror r20 + lsr r14 + ror r23 + ror r22 + ror r21 + ror r20 + lsr r14 + ror r23 + ror r22 + ror r21 + ror r20 + mov r25,r6 + mov r15,r7 + mov r1,r8 + mov r0,r9 + mov r24,r13 + lsl r24 + rol r25 + rol r15 + rol r1 + rol r0 + and r20,r25 + and r21,r15 + and r22,r1 + and r23,r0 + eor r16,r20 + eor r17,r21 + eor r18,r22 + eor r19,r23 + mov r21,r6 + mov r22,r7 + mov r23,r8 + mov r14,r9 + mov r20,r13 + lsl r20 + rol r21 + rol r22 + rol r23 + rol r14 + lsl r20 + rol r21 + rol r22 + rol r23 + rol r14 + lsl r20 + rol r21 + rol r22 + rol r23 + rol r14 + lsl r20 + rol r21 + rol r22 + rol r23 + rol r14 + mov r1,r6 + mov r0,r7 + mov r24,r11 + mov r25,r12 + mov r15,r13 + lsl r24 + rol r25 + rol r15 + rol r1 + rol r0 + lsl r24 + rol r25 + rol r15 + rol r1 + rol r0 + lsl r24 + rol r25 + rol r15 + rol r1 + rol r0 + lsl r24 + rol r25 + rol r15 + rol r1 + rol r0 + and r21,r25 + and r22,r15 + and r23,r1 + and r14,r0 + eor r16,r21 + eor r17,r22 + eor r18,r23 + eor r19,r14 + mov r23,r26 + mov r14,r27 + mov r20,r3 + mov r21,r4 + mov r22,r5 + lsr r14 + ror r23 + ror r22 + ror r21 + ror r20 + lsr r14 + ror r23 + ror r22 + ror r21 + ror r20 + and r20,r3 + and r21,r4 + and r22,r5 + and r23,r26 + mov r0,r26 + movw r24,r2 + mov r15,r4 + mov r1,r5 + lsl r24 + rol r25 + rol r15 + rol r1 + rol r0 + and r20,r25 + and r21,r15 + and r22,r1 + and r23,r0 + eor r16,r20 + eor r17,r21 + eor r18,r22 + eor r19,r23 + mov r21,r6 + mov r22,r7 + mov r23,r8 + mov r14,r9 + mov r20,r13 + lsr r14 + ror r23 + ror r22 + ror r21 + ror r20 + lsr r14 + ror r23 + ror r22 + ror r21 + ror r20 + mov r15,r6 + mov r1,r7 + mov r0,r8 + movw r24,r12 + lsr r0 + ror r1 + ror r15 + ror r25 + ror r24 + lsr r0 + ror r1 + ror r15 + ror r25 + ror r24 + and r20,r24 + and r21,r25 + and r22,r15 + and r23,r1 + mov r1,r6 + mov r0,r7 + mov r24,r11 + mov r25,r12 + mov r15,r13 + lsl r24 + rol r25 + rol r15 + rol r1 + rol r0 + lsl r24 + rol r25 + rol r15 + rol r1 + rol r0 + and r20,r25 + and r21,r15 + and r22,r1 + and r23,r0 + eor r16,r20 + eor r17,r21 + eor r18,r22 + eor r19,r23 + mov r20,r11 + mov r21,r12 + mov r22,r13 + mov r23,r6 + mov r1,r6 + movw r14,r10 + movw r24,r12 + lsl r14 + rol r15 + rol r24 + rol r25 + rol r1 + lsl r14 + rol r15 + rol r24 + rol r25 + rol r1 + lsl r14 + rol r15 + rol r24 + rol r25 + rol r1 + lsl r14 + rol r15 + rol r24 + rol r25 + rol r1 + and r20,r15 + and r21,r24 + and r22,r25 + and r23,r1 + mov r1,r6 + movw r14,r10 + movw r24,r12 + lsr r1 + ror r25 + ror r24 + ror r15 + ror r14 + lsr r1 + ror r25 + ror r24 + ror r15 + ror r14 + lsr r1 + ror r25 + ror r24 + ror r15 + ror r14 + and r20,r14 + and r21,r15 + and r22,r24 + and r23,r25 + mov r1,r6 + movw r14,r10 + movw r24,r12 + lsr r1 + ror r25 + ror r24 + ror r15 + ror r14 + and r20,r14 + and r21,r15 + and r22,r24 + and r23,r25 + eor r16,r20 + eor r17,r21 + eor r18,r22 + eor r19,r23 + std Z+16,r2 + std Z+17,r3 + std Z+18,r4 + std Z+19,r5 + std Z+20,r6 + std Z+21,r7 + std Z+22,r8 + std Z+23,r9 + std Z+24,r10 + std Z+25,r11 + std Z+26,r12 + std Z+27,r13 + std Z+28,r16 + std Z+29,r17 + std Z+30,r18 + std Z+31,r19 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size grain128_core, .-grain128_core + + .text +.global grain128_preoutput + .type grain128_preoutput, @function +grain128_preoutput: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ldd r20,Z+16 + ldd r21,Z+17 + ldd r26,Z+18 + ldd r18,Z+22 + ldd r19,Z+23 + lsl r18 + rol r19 + rol r20 + rol r21 + rol r26 + lsl r18 + rol r19 + rol r20 + rol r21 + rol r26 + lsl r18 + rol r19 + rol r20 + rol r21 + rol r26 + lsl r18 + rol r19 + rol r20 + rol r21 + rol r26 + ldd r18,Z+27 + ldd r28,Z+28 + ldd r29,Z+29 + ldd r2,Z+30 + ldd r3,Z+31 + lsr r18 + ror r3 + ror r2 + ror r29 + ror r28 + ld r4,Z + ldd r5,Z+1 + ldd r6,Z+2 + ldd r7,Z+3 + ldd r8,Z+4 + ldd r9,Z+5 + ldd r10,Z+6 + ldd r11,Z+7 + mov r23,r4 + mov r24,r5 + mov r25,r6 + mov r22,r11 + and r22,r19 + and r23,r20 + and r24,r21 + and r25,r26 + movw r14,r4 + mov r18,r6 + movw r12,r10 + lsr r18 + ror r15 + ror r14 + ror r13 + ror r12 + lsr r18 + ror r15 + ror r14 + ror r13 + ror r12 + lsr r18 + ror r15 + ror r14 + ror r13 + ror r12 + mov r1,r4 + mov r0,r5 + mov r16,r9 + mov r17,r10 + mov r27,r11 + lsl r16 + rol r17 + rol r27 + rol r1 + rol r0 + lsl r16 + rol r17 + rol r27 + rol r1 + rol r0 + lsl r16 + rol r17 + rol r27 + rol r1 + rol r0 + lsl r16 + rol r17 + rol r27 + rol r1 + rol r0 + and r12,r17 + and r13,r27 + and r14,r1 + and r15,r0 + eor r22,r12 + eor r23,r13 + eor r24,r14 + eor r25,r15 + ldd r4,Z+8 + ldd r5,Z+9 + ldd r6,Z+10 + ldd r7,Z+11 + movw r14,r8 + mov r18,r10 + movw r12,r6 + lsl r12 + rol r13 + rol r14 + rol r15 + rol r18 + lsl r12 + rol r13 + rol r14 + rol r15 + rol r18 + and r13,r28 + and r14,r29 + and r15,r2 + and r18,r3 + eor r22,r13 + eor r23,r14 + eor r24,r15 + eor r25,r18 + mov r18,r8 + movw r12,r4 + movw r14,r6 + lsl r12 + rol r13 + rol r14 + rol r15 + rol r18 + lsl r12 + rol r13 + rol r14 + rol r15 + rol r18 + lsl r12 + rol r13 + rol r14 + rol r15 + rol r18 + lsl r12 + rol r13 + rol r14 + rol r15 + rol r18 + ldd r8,Z+12 + ldd r9,Z+13 + ldd r10,Z+14 + ldd r11,Z+15 + mov r27,r4 + mov r1,r5 + mov r0,r6 + movw r16,r10 + lsr r0 + ror r1 + ror r27 + ror r17 + ror r16 + and r13,r16 + and r14,r17 + and r15,r27 + and r18,r1 + eor r22,r13 + eor r23,r14 + eor r24,r15 + eor r25,r18 + and r19,r28 + and r20,r29 + and r21,r2 + and r26,r3 + mov r18,r4 + movw r12,r8 + movw r14,r10 + lsr r18 + ror r15 + ror r14 + ror r13 + ror r12 + lsr r18 + ror r15 + ror r14 + ror r13 + ror r12 + and r19,r12 + and r20,r13 + and r21,r14 + and r26,r15 + eor r22,r19 + eor r23,r20 + eor r24,r21 + eor r25,r26 + mov r18,r4 + movw r12,r8 + movw r14,r10 + lsr r18 + ror r15 + ror r14 + ror r13 + ror r12 + lsr r18 + ror r15 + ror r14 + ror r13 + ror r12 + lsr r18 + ror r15 + ror r14 + ror r13 + ror r12 + eor r22,r12 + eor r23,r13 + eor r24,r14 + eor r25,r15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r26,Z+20 + ldd r27,Z+21 + ldd r28,Z+22 + ldd r29,Z+23 + ldd r4,Z+24 + ldd r5,Z+25 + ldd r6,Z+26 + ldd r7,Z+27 + ldd r8,Z+28 + ldd r9,Z+29 + ldd r10,Z+30 + ldd r11,Z+31 + mov r3,r18 + mov r12,r19 + mov r13,r20 + mov r14,r21 + mov r2,r29 + lsl r2 + rol r3 + rol r12 + rol r13 + rol r14 + lsl r2 + rol r3 + rol r12 + rol r13 + rol r14 + eor r22,r3 + eor r23,r12 + eor r24,r13 + eor r25,r14 + movw r12,r18 + mov r14,r20 + movw r2,r28 + lsr r14 + ror r13 + ror r12 + ror r3 + ror r2 + eor r22,r2 + eor r23,r3 + eor r24,r12 + eor r25,r13 + mov r3,r26 + mov r12,r27 + mov r13,r28 + mov r14,r29 + mov r2,r7 + lsl r2 + rol r3 + rol r12 + rol r13 + rol r14 + lsl r2 + rol r3 + rol r12 + rol r13 + rol r14 + lsl r2 + rol r3 + rol r12 + rol r13 + rol r14 + lsl r2 + rol r3 + rol r12 + rol r13 + rol r14 + eor r22,r3 + eor r23,r12 + eor r24,r13 + eor r25,r14 + movw r12,r26 + mov r14,r28 + movw r2,r6 + lsr r14 + ror r13 + ror r12 + ror r3 + ror r2 + lsr r14 + ror r13 + ror r12 + ror r3 + ror r2 + lsr r14 + ror r13 + ror r12 + ror r3 + ror r2 + eor r22,r2 + eor r23,r3 + eor r24,r12 + eor r25,r13 + eor r22,r4 + eor r23,r5 + eor r24,r6 + eor r25,r7 + movw r12,r4 + mov r14,r6 + movw r2,r10 + lsl r2 + rol r3 + rol r12 + rol r13 + rol r14 + eor r22,r3 + eor r23,r12 + eor r24,r13 + eor r25,r14 + mov r14,r4 + movw r2,r8 + movw r12,r10 + lsl r2 + rol r3 + rol r12 + rol r13 + rol r14 + eor r22,r3 + eor r23,r12 + eor r24,r13 + eor r25,r14 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size grain128_preoutput, .-grain128_preoutput + + .text +.global grain128_swap_word32 + .type grain128_swap_word32, @function +grain128_swap_word32: + movw r30,r24 +.L__stack_usage = 2 + ld r25,Z + ldd r24,Z+1 + ldd r23,Z+2 + ldd r22,Z+3 + mov r18,r22 + andi r18,85 + lsl r18 + lsr r22 + andi r22,85 + or r22,r18 + mov r18,r22 + andi r18,51 + lsl r18 + lsl r18 + lsr r22 + lsr r22 + andi r22,51 + or r22,r18 + swap r22 + mov r18,r23 + andi r18,85 + lsl r18 + lsr r23 + andi r23,85 + or r23,r18 + mov r18,r23 + andi r18,51 + lsl r18 + lsl r18 + lsr r23 + lsr r23 + andi r23,51 + or r23,r18 + swap r23 + mov r18,r24 + andi r18,85 + lsl r18 + lsr r24 + andi r24,85 + or r24,r18 + mov r18,r24 + andi r18,51 + lsl r18 + lsl r18 + lsr r24 + lsr r24 + andi r24,51 + or r24,r18 + swap r24 + mov r18,r25 + andi r18,85 + lsl r18 + lsr r25 + andi r25,85 + or r25,r18 + mov r18,r25 + andi r18,51 + lsl r18 + lsl r18 + lsr r25 + lsr r25 + andi r25,51 + or r25,r18 + swap r25 + ret + .size grain128_swap_word32, .-grain128_swap_word32 + + .text +.global grain128_compute_tag + .type grain128_compute_tag, @function +grain128_compute_tag: + movw r30,r24 +.L__stack_usage = 2 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r0,Z+40 + eor r18,r0 + ldd r0,Z+41 + eor r19,r0 + ldd r0,Z+42 + eor r20,r0 + ldd r0,Z+43 + eor r21,r0 + ldd r0,Z+44 + eor r22,r0 + ldd r0,Z+45 + eor r23,r0 + ldd r0,Z+46 + eor r26,r0 + ldd r0,Z+47 + eor r27,r0 + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + mov r24,r18 + andi r24,85 + lsl r24 + lsr r18 + andi r18,85 + or r18,r24 + mov r24,r18 + andi r24,51 + lsl r24 + lsl r24 + lsr r18 + lsr r18 + andi r18,51 + or r18,r24 + swap r18 + mov r24,r19 + andi r24,85 + lsl r24 + lsr r19 + andi r19,85 + or r19,r24 + mov r24,r19 + andi r24,51 + lsl r24 + lsl r24 + lsr r19 + lsr r19 + andi r19,51 + or r19,r24 + swap r19 + mov r24,r20 + andi r24,85 + lsl r24 + lsr r20 + andi r20,85 + or r20,r24 + mov r24,r20 + andi r24,51 + lsl r24 + lsl r24 + lsr r20 + lsr r20 + andi r20,51 + or r20,r24 + swap r20 + mov r24,r21 + andi r24,85 + lsl r24 + lsr r21 + andi r21,85 + or r21,r24 + mov r24,r21 + andi r24,51 + lsl r24 + lsl r24 + lsr r21 + lsr r21 + andi r21,51 + or r21,r24 + swap r21 + mov r24,r22 + andi r24,85 + lsl r24 + lsr r22 + andi r22,85 + or r22,r24 + mov r24,r22 + andi r24,51 + lsl r24 + lsl r24 + lsr r22 + lsr r22 + andi r22,51 + or r22,r24 + swap r22 + mov r24,r23 + andi r24,85 + lsl r24 + lsr r23 + andi r23,85 + or r23,r24 + mov r24,r23 + andi r24,51 + lsl r24 + lsl r24 + lsr r23 + lsr r23 + andi r23,51 + or r23,r24 + swap r23 + mov r24,r26 + andi r24,85 + lsl r24 + lsr r26 + andi r26,85 + or r26,r24 + mov r24,r26 + andi r24,51 + lsl r24 + lsl r24 + lsr r26 + lsr r26 + andi r26,51 + or r26,r24 + swap r26 + mov r24,r27 + andi r24,85 + lsl r24 + lsr r27 + andi r27,85 + or r27,r24 + mov r24,r27 + andi r24,51 + lsl r24 + lsl r24 + lsr r27 + lsr r27 + andi r27,51 + or r27,r24 + swap r27 + std Z+48,r27 + std Z+49,r26 + std Z+50,r23 + std Z+51,r22 + std Z+52,r21 + std Z+53,r20 + std Z+54,r19 + std Z+55,r18 + ret + .size grain128_compute_tag, .-grain128_compute_tag + + .text +.global grain128_interleave + .type grain128_interleave, @function +grain128_interleave: + movw r30,r24 +.L__stack_usage = 2 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + movw r22,r18 + movw r26,r20 + lsr r27 + ror r26 + ror r23 + ror r22 + lsr r27 + ror r26 + ror r23 + ror r22 + lsr r27 + ror r26 + ror r23 + ror r22 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + andi r22,17 + andi r23,17 + andi r26,17 + andi r27,17 + eor r18,r22 + eor r19,r23 + eor r20,r26 + eor r21,r27 + lsl r22 + rol r23 + rol r26 + rol r27 + lsl r22 + rol r23 + rol r26 + rol r27 + lsl r22 + rol r23 + rol r26 + rol r27 + eor r18,r22 + eor r19,r23 + eor r20,r26 + eor r21,r27 + movw r22,r18 + movw r26,r20 + mov r0,r1 + lsl r22 + rol r23 + rol r26 + rol r27 + rol r0 + lsl r22 + rol r23 + rol r26 + rol r27 + rol r0 + mov r22,r23 + mov r23,r26 + mov r26,r27 + mov r27,r0 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + andi r22,3 + andi r23,3 + andi r26,3 + andi r27,3 + eor r18,r22 + eor r19,r23 + eor r20,r26 + eor r21,r27 + mov r0,r1 + lsr r27 + ror r26 + ror r23 + ror r22 + ror r0 + lsr r27 + ror r26 + ror r23 + ror r22 + ror r0 + mov r27,r26 + mov r26,r23 + mov r23,r22 + mov r22,r0 + eor r18,r22 + eor r19,r23 + eor r20,r26 + eor r21,r27 + movw r22,r18 + movw r26,r20 + mov r22,r23 + mov r23,r26 + mov r26,r27 + mov r27,r1 + lsr r27 + ror r26 + ror r23 + ror r22 + lsr r27 + ror r26 + ror r23 + ror r22 + lsr r27 + ror r26 + ror r23 + ror r22 + lsr r27 + ror r26 + ror r23 + ror r22 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + andi r22,15 + mov r23,r1 + andi r26,15 + mov r27,r1 + eor r18,r22 + eor r19,r23 + eor r20,r26 + eor r21,r27 + mov r27,r26 + mov r26,r23 + mov r23,r22 + mov r22,r1 + lsl r22 + rol r23 + rol r26 + rol r27 + lsl r22 + rol r23 + rol r26 + rol r27 + lsl r22 + rol r23 + rol r26 + rol r27 + lsl r22 + rol r23 + rol r26 + rol r27 + eor r18,r22 + eor r19,r23 + eor r20,r26 + eor r21,r27 + st Z,r20 + std Z+1,r21 + std Z+2,r18 + std Z+3,r19 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r20,Z+6 + ldd r21,Z+7 + movw r22,r18 + movw r26,r20 + lsr r27 + ror r26 + ror r23 + ror r22 + lsr r27 + ror r26 + ror r23 + ror r22 + lsr r27 + ror r26 + ror r23 + ror r22 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + andi r22,17 + andi r23,17 + andi r26,17 + andi r27,17 + eor r18,r22 + eor r19,r23 + eor r20,r26 + eor r21,r27 + lsl r22 + rol r23 + rol r26 + rol r27 + lsl r22 + rol r23 + rol r26 + rol r27 + lsl r22 + rol r23 + rol r26 + rol r27 + eor r18,r22 + eor r19,r23 + eor r20,r26 + eor r21,r27 + movw r22,r18 + movw r26,r20 + mov r0,r1 + lsl r22 + rol r23 + rol r26 + rol r27 + rol r0 + lsl r22 + rol r23 + rol r26 + rol r27 + rol r0 + mov r22,r23 + mov r23,r26 + mov r26,r27 + mov r27,r0 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + andi r22,3 + andi r23,3 + andi r26,3 + andi r27,3 + eor r18,r22 + eor r19,r23 + eor r20,r26 + eor r21,r27 + mov r0,r1 + lsr r27 + ror r26 + ror r23 + ror r22 + ror r0 + lsr r27 + ror r26 + ror r23 + ror r22 + ror r0 + mov r27,r26 + mov r26,r23 + mov r23,r22 + mov r22,r0 + eor r18,r22 + eor r19,r23 + eor r20,r26 + eor r21,r27 + movw r22,r18 + movw r26,r20 + mov r22,r23 + mov r23,r26 + mov r26,r27 + mov r27,r1 + lsr r27 + ror r26 + ror r23 + ror r22 + lsr r27 + ror r26 + ror r23 + ror r22 + lsr r27 + ror r26 + ror r23 + ror r22 + lsr r27 + ror r26 + ror r23 + ror r22 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + andi r22,15 + mov r23,r1 + andi r26,15 + mov r27,r1 + eor r18,r22 + eor r19,r23 + eor r20,r26 + eor r21,r27 + mov r27,r26 + mov r26,r23 + mov r23,r22 + mov r22,r1 + lsl r22 + rol r23 + rol r26 + rol r27 + lsl r22 + rol r23 + rol r26 + rol r27 + lsl r22 + rol r23 + rol r26 + rol r27 + lsl r22 + rol r23 + rol r26 + rol r27 + eor r18,r22 + eor r19,r23 + eor r20,r26 + eor r21,r27 + std Z+4,r20 + std Z+5,r21 + std Z+6,r18 + std Z+7,r19 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r20,Z+10 + ldd r21,Z+11 + movw r22,r18 + movw r26,r20 + lsr r27 + ror r26 + ror r23 + ror r22 + lsr r27 + ror r26 + ror r23 + ror r22 + lsr r27 + ror r26 + ror r23 + ror r22 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + andi r22,17 + andi r23,17 + andi r26,17 + andi r27,17 + eor r18,r22 + eor r19,r23 + eor r20,r26 + eor r21,r27 + lsl r22 + rol r23 + rol r26 + rol r27 + lsl r22 + rol r23 + rol r26 + rol r27 + lsl r22 + rol r23 + rol r26 + rol r27 + eor r18,r22 + eor r19,r23 + eor r20,r26 + eor r21,r27 + movw r22,r18 + movw r26,r20 + mov r0,r1 + lsl r22 + rol r23 + rol r26 + rol r27 + rol r0 + lsl r22 + rol r23 + rol r26 + rol r27 + rol r0 + mov r22,r23 + mov r23,r26 + mov r26,r27 + mov r27,r0 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + andi r22,3 + andi r23,3 + andi r26,3 + andi r27,3 + eor r18,r22 + eor r19,r23 + eor r20,r26 + eor r21,r27 + mov r0,r1 + lsr r27 + ror r26 + ror r23 + ror r22 + ror r0 + lsr r27 + ror r26 + ror r23 + ror r22 + ror r0 + mov r27,r26 + mov r26,r23 + mov r23,r22 + mov r22,r0 + eor r18,r22 + eor r19,r23 + eor r20,r26 + eor r21,r27 + movw r22,r18 + movw r26,r20 + mov r22,r23 + mov r23,r26 + mov r26,r27 + mov r27,r1 + lsr r27 + ror r26 + ror r23 + ror r22 + lsr r27 + ror r26 + ror r23 + ror r22 + lsr r27 + ror r26 + ror r23 + ror r22 + lsr r27 + ror r26 + ror r23 + ror r22 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + andi r22,15 + mov r23,r1 + andi r26,15 + mov r27,r1 + eor r18,r22 + eor r19,r23 + eor r20,r26 + eor r21,r27 + mov r27,r26 + mov r26,r23 + mov r23,r22 + mov r22,r1 + lsl r22 + rol r23 + rol r26 + rol r27 + lsl r22 + rol r23 + rol r26 + rol r27 + lsl r22 + rol r23 + rol r26 + rol r27 + lsl r22 + rol r23 + rol r26 + rol r27 + eor r18,r22 + eor r19,r23 + eor r20,r26 + eor r21,r27 + std Z+8,r20 + std Z+9,r21 + std Z+10,r18 + std Z+11,r19 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r20,Z+14 + ldd r21,Z+15 + movw r22,r18 + movw r26,r20 + lsr r27 + ror r26 + ror r23 + ror r22 + lsr r27 + ror r26 + ror r23 + ror r22 + lsr r27 + ror r26 + ror r23 + ror r22 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + andi r22,17 + andi r23,17 + andi r26,17 + andi r27,17 + eor r18,r22 + eor r19,r23 + eor r20,r26 + eor r21,r27 + lsl r22 + rol r23 + rol r26 + rol r27 + lsl r22 + rol r23 + rol r26 + rol r27 + lsl r22 + rol r23 + rol r26 + rol r27 + eor r18,r22 + eor r19,r23 + eor r20,r26 + eor r21,r27 + movw r22,r18 + movw r26,r20 + mov r0,r1 + lsl r22 + rol r23 + rol r26 + rol r27 + rol r0 + lsl r22 + rol r23 + rol r26 + rol r27 + rol r0 + mov r22,r23 + mov r23,r26 + mov r26,r27 + mov r27,r0 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + andi r22,3 + andi r23,3 + andi r26,3 + andi r27,3 + eor r18,r22 + eor r19,r23 + eor r20,r26 + eor r21,r27 + mov r0,r1 + lsr r27 + ror r26 + ror r23 + ror r22 + ror r0 + lsr r27 + ror r26 + ror r23 + ror r22 + ror r0 + mov r27,r26 + mov r26,r23 + mov r23,r22 + mov r22,r0 + eor r18,r22 + eor r19,r23 + eor r20,r26 + eor r21,r27 + movw r22,r18 + movw r26,r20 + mov r22,r23 + mov r23,r26 + mov r26,r27 + mov r27,r1 + lsr r27 + ror r26 + ror r23 + ror r22 + lsr r27 + ror r26 + ror r23 + ror r22 + lsr r27 + ror r26 + ror r23 + ror r22 + lsr r27 + ror r26 + ror r23 + ror r22 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + andi r22,15 + mov r23,r1 + andi r26,15 + mov r27,r1 + eor r18,r22 + eor r19,r23 + eor r20,r26 + eor r21,r27 + mov r27,r26 + mov r26,r23 + mov r23,r22 + mov r22,r1 + lsl r22 + rol r23 + rol r26 + rol r27 + lsl r22 + rol r23 + rol r26 + rol r27 + lsl r22 + rol r23 + rol r26 + rol r27 + lsl r22 + rol r23 + rol r26 + rol r27 + eor r18,r22 + eor r19,r23 + eor r20,r26 + eor r21,r27 + std Z+12,r20 + std Z+13,r21 + std Z+14,r18 + std Z+15,r19 + ret + .size grain128_interleave, .-grain128_interleave + +#endif diff --git a/grain-128aead/Implementations/crypto_aead/grain128aead/rhys/internal-grain128.c b/grain-128aead/Implementations/crypto_aead/grain128aead/rhys/internal-grain128.c index d0d71ea..c98376d 100644 --- a/grain-128aead/Implementations/crypto_aead/grain128aead/rhys/internal-grain128.c +++ b/grain-128aead/Implementations/crypto_aead/grain128aead/rhys/internal-grain128.c @@ -26,14 +26,9 @@ #define GWORD(a, b, start_bit) \ (((a) << ((start_bit) % 32)) ^ ((b) >> (32 - ((start_bit) % 32)))) -/** - * \brief Performs 32 rounds of Grain-128 in parallel. - * - * \param state Grain-128 state. - * \param x 32 bits of input to be incorporated into the LFSR state, or zero. - * \param x2 Another 32 bits to be incorporated into the NFSR state, or zero. - */ -static void grain128_core +#if !defined(__AVR__) + +void grain128_core (grain128_state_t *state, uint32_t x, uint32_t x2) { uint32_t s0, s1, s2, s3; @@ -67,7 +62,7 @@ static void grain128_core /* Perform the NFSR feedback algorithm from the specification: * * b'[i] = b[i + 1] - * b'[127] = s'[127] ^ b[0] ^ b[26] ^ b[56] ^ b[91] ^ b[96] + * b'[127] = s[0] ^ b[0] ^ b[26] ^ b[56] ^ b[91] ^ b[96] * ^ (b[3] & b[67]) ^ (b[11] & b[13]) ^ (b[17] & b[18]) * ^ (b[27] & b[59]) ^ (b[40] & b[48]) ^ (b[61] & b[65]) * ^ (b[68] & b[84]) ^ (b[22] & b[24] & b[25]) @@ -106,14 +101,19 @@ static void grain128_core state->nfsr[3] = x2; } -/** - * \brief Generates 32 bits of pre-output data. - * - * \param state Grain-128 state. - * - * \return The generated 32 bits of pre-output data. - */ -static uint32_t grain128_preoutput(const grain128_state_t *state) +#define grain128_preoutput grain128_preoutput_inner +#define grain128_preoutput_setup(state) grain128_preoutput((state)) + +#else /* __AVR__ */ + +/* For some reason, the AVR assembly preoutput doesn't work for key setup + * but does work everywhere else. Investigate and fix this later. */ +uint32_t grain128_preoutput(const grain128_state_t *state); +#define grain128_preoutput_setup(state) grain128_preoutput_inner((state)) + +#endif /* __AVR__ */ + +uint32_t grain128_preoutput_inner(const grain128_state_t *state) { uint32_t s0, s1, s2, s3; uint32_t b0, b1, b2, b3; @@ -170,12 +170,37 @@ static uint32_t grain128_preoutput(const grain128_state_t *state) (_y) = (((_y) & (mask)) << (shift)) | (((_y) >> (shift)) & (mask)); \ } while (0) +#if defined(__AVR__) +#define GRAIN128_ASM_HELPERS 1 +#endif + +#if defined(GRAIN128_ASM_HELPERS) + +/** + * \brief Loads a 32-bit word and swaps it from big-endian bit order + * into little-endian bit order. + * + * \param data Points to the word to be loaded. + * \return Little-endian version of the 32-bit word at \a data. + */ +uint32_t grain128_swap_word32(const unsigned char *data); + +/** + * \brief Interleaves the bits in a 16-byte keystream block to separate + * out the even and odd bits. + * + * \param ks Points to the keystream block. + */ +void grain128_interleave(unsigned char *ks); + +#endif + void grain128_setup (grain128_state_t *state, const unsigned char *key, const unsigned char *nonce) { uint32_t k[4]; - unsigned round; + uint8_t round; /* Internally, the Grain-128 stream cipher uses big endian bit * order, but the Grain-128AEAD specification for NIST uses little @@ -187,26 +212,33 @@ void grain128_setup * P = [7 6 5 4 3 2 1 0 15 14 13 12 11 10 9 8 * 23 22 21 20 19 18 17 16 31 30 29 28 27 26 25 24] */ + #if defined(GRAIN128_ASM_HELPERS) #define SWAP_BITS(out, in) \ do { \ - uint32_t tmp = (in); \ + (out) = grain128_swap_word32((in)); \ + } while (0) + #else + #define SWAP_BITS(out, in) \ + do { \ + uint32_t tmp = be_load_word32((in)); \ bit_permute_step_simple(tmp, 0x55555555, 1); \ bit_permute_step_simple(tmp, 0x33333333, 2); \ bit_permute_step_simple(tmp, 0x0f0f0f0f, 4); \ (out) = tmp; \ } while (0) + #endif /* Initialize the LFSR state with the nonce and padding */ - SWAP_BITS(state->lfsr[0], be_load_word32(nonce)); - SWAP_BITS(state->lfsr[1], be_load_word32(nonce + 4)); - SWAP_BITS(state->lfsr[2], be_load_word32(nonce + 8)); + SWAP_BITS(state->lfsr[0], nonce); + SWAP_BITS(state->lfsr[1], nonce + 4); + SWAP_BITS(state->lfsr[2], nonce + 8); state->lfsr[3] = 0xFFFFFFFEU; /* pad with all-1s and a terminating 0 */ /* Initialize the NFSR state with the key */ - SWAP_BITS(k[0], be_load_word32(key)); - SWAP_BITS(k[1], be_load_word32(key + 4)); - SWAP_BITS(k[2], be_load_word32(key + 8)); - SWAP_BITS(k[3], be_load_word32(key + 12)); + SWAP_BITS(k[0], key); + SWAP_BITS(k[1], key + 4); + SWAP_BITS(k[2], key + 8); + SWAP_BITS(k[3], key + 12); state->nfsr[0] = k[0]; state->nfsr[1] = k[1]; state->nfsr[2] = k[2]; @@ -215,7 +247,7 @@ void grain128_setup /* Perform 256 rounds of Grain-128 to mix up the initial state. * The rounds can be performed 32 at a time: 32 * 8 = 256 */ for (round = 0; round < 8; ++round) { - uint32_t y = grain128_preoutput(state); + uint32_t y = grain128_preoutput_setup(state); grain128_core(state, y, y); } @@ -241,6 +273,7 @@ void grain128_setup */ static void grain128_next_keystream(grain128_state_t *state) { +#if !defined(GRAIN128_ASM_HELPERS) unsigned posn; for (posn = 0; posn < sizeof(state->ks); posn += 4) { /* Get the next word of pre-output and run the Grain-128 core */ @@ -264,6 +297,16 @@ static void grain128_next_keystream(grain128_state_t *state) bit_permute_step_simple(x, 0x00ff00ff, 8); be_store_word32(state->ks + posn, x); } +#else + /* Generate the data and then perform the interleaving */ + unsigned posn; + for (posn = 0; posn < sizeof(state->ks); posn += 4) { + uint32_t x = grain128_preoutput(state); + le_store_word32(state->ks + posn, x); + grain128_core(state, 0, 0); + } + grain128_interleave(state->ks); +#endif } void grain128_authenticate @@ -394,6 +437,8 @@ void grain128_decrypt state->posn = posn; } +#if !defined(__AVR__) + void grain128_compute_tag(grain128_state_t *state) { uint64_t x; @@ -409,3 +454,5 @@ void grain128_compute_tag(grain128_state_t *state) bit_permute_step_simple(x, 0x0f0f0f0f0f0f0f0fULL, 4); be_store_word64(state->ks, x); } + +#endif /* !__AVR__ */ diff --git a/grain-128aead/Implementations/crypto_aead/grain128aead/rhys/internal-grain128.h b/grain-128aead/Implementations/crypto_aead/grain128aead/rhys/internal-grain128.h index 4c3a6e4..ba1d260 100644 --- a/grain-128aead/Implementations/crypto_aead/grain128aead/rhys/internal-grain128.h +++ b/grain-128aead/Implementations/crypto_aead/grain128aead/rhys/internal-grain128.h @@ -28,6 +28,8 @@ /** * \file internal-grain128.h * \brief Internal implementation of the Grain-128 stream cipher. + * + * References: https://grain-128aead.github.io/ */ #ifdef __cplusplus @@ -52,6 +54,25 @@ typedef struct } grain128_state_t; /** + * \brief Performs 32 rounds of Grain-128 in parallel. + * + * \param state Grain-128 state. + * \param x 32 bits of input to be incorporated into the LFSR state, or zero. + * \param x2 Another 32 bits to be incorporated into the NFSR state, or zero. + */ +void grain128_core + (grain128_state_t *state, uint32_t x, uint32_t x2); + +/** + * \brief Generates 32 bits of pre-output data. + * + * \param state Grain-128 state. + * + * \return The generated 32 bits of pre-output data. + */ +uint32_t grain128_preoutput(const grain128_state_t *state); + +/** * \brief Sets up the initial Grain-128 state with the key and nonce. * * \param state Grain-128 state to be initialized. diff --git a/hyena/Implementations/crypto_aead/hyenav1/rhys/encrypt.c b/hyena/Implementations/crypto_aead/hyenav1/rhys/encrypt.c index db50784..9db7825 100644 --- a/hyena/Implementations/crypto_aead/hyenav1/rhys/encrypt.c +++ b/hyena/Implementations/crypto_aead/hyenav1/rhys/encrypt.c @@ -9,7 +9,7 @@ int crypto_aead_encrypt const unsigned char *npub, const unsigned char *k) { - return hyena_aead_encrypt + return hyena_v1_aead_encrypt (c, clen, m, mlen, ad, adlen, nsec, npub, k); } @@ -21,6 +21,6 @@ int crypto_aead_decrypt const unsigned char *npub, const unsigned char *k) { - return hyena_aead_decrypt + return hyena_v1_aead_decrypt (m, mlen, nsec, c, clen, ad, adlen, npub, k); } diff --git a/hyena/Implementations/crypto_aead/hyenav1/rhys/hyena.c b/hyena/Implementations/crypto_aead/hyenav1/rhys/hyena.c index 3af79fa..eaafb36 100644 --- a/hyena/Implementations/crypto_aead/hyenav1/rhys/hyena.c +++ b/hyena/Implementations/crypto_aead/hyenav1/rhys/hyena.c @@ -25,14 +25,24 @@ #include "internal-util.h" #include -aead_cipher_t const hyena_cipher = { - "HYENA", +aead_cipher_t const hyena_v1_cipher = { + "HYENA-v1", HYENA_KEY_SIZE, HYENA_NONCE_SIZE, HYENA_TAG_SIZE, AEAD_FLAG_LITTLE_ENDIAN, - hyena_aead_encrypt, - hyena_aead_decrypt + hyena_v1_aead_encrypt, + hyena_v1_aead_decrypt +}; + +aead_cipher_t const hyena_v2_cipher = { + "HYENA-v2", + HYENA_KEY_SIZE, + HYENA_NONCE_SIZE, + HYENA_TAG_SIZE, + AEAD_FLAG_LITTLE_ENDIAN, + hyena_v2_aead_encrypt, + hyena_v2_aead_decrypt }; /** @@ -52,7 +62,24 @@ static void hyena_double_delta(unsigned char D[8]) } /** - * \brief Process the associated data for HYENA. + * \brief Triples a delta value in the F(2^64) field. + * + * \param D The delta value to be tripled. + * + * D' = D ^ (D << 1) if the top-most bit is 0, or D' = D ^ (D << 1) ^ 0x1B + * otherwise. + */ +static void hyena_triple_delta(unsigned char D[8]) +{ + unsigned index; + unsigned char mask = (unsigned char)(((signed char)(D[0])) >> 7); + for (index = 0; index < 7; ++index) + D[index] ^= (D[index] << 1) | (D[index + 1] >> 7); + D[7] ^= (D[7] << 1) ^ (mask & 0x1B); +} + +/** + * \brief Process the associated data for HYENA-v1. * * \param ks Key schedule for the GIFT-128 cipher. * \param Y Internal hash state of HYENA. @@ -60,7 +87,7 @@ static void hyena_double_delta(unsigned char D[8]) * \param ad Points to the associated data. * \param adlen Length of the associated data in bytes. */ -static void hyena_process_ad +static void hyena_v1_process_ad (const gift128n_key_schedule_t *ks, unsigned char Y[16], unsigned char D[8], const unsigned char *ad, unsigned long long adlen) @@ -97,7 +124,7 @@ static void hyena_process_ad } } -int hyena_aead_encrypt +int hyena_v1_aead_encrypt (unsigned char *c, unsigned long long *clen, const unsigned char *m, unsigned long long mlen, const unsigned char *ad, unsigned long long adlen, @@ -116,8 +143,7 @@ int hyena_aead_encrypt *clen = mlen + HYENA_TAG_SIZE; /* Set up the key schedule and use it to encrypt the nonce */ - if (!gift128n_init(&ks, k, HYENA_KEY_SIZE)) - return -1; + gift128n_init(&ks, k); Y[0] = 0; if (adlen == 0) Y[0] |= 0x01; @@ -131,7 +157,7 @@ int hyena_aead_encrypt memcpy(D, Y + 8, 8); /* Process the associated data */ - hyena_process_ad(&ks, Y, D, ad, adlen); + hyena_v1_process_ad(&ks, Y, D, ad, adlen); /* Encrypt the plaintext to produce the ciphertext */ if (mlen > 0) { @@ -185,7 +211,7 @@ int hyena_aead_encrypt return 0; } -int hyena_aead_decrypt +int hyena_v1_aead_decrypt (unsigned char *m, unsigned long long *mlen, unsigned char *nsec, const unsigned char *c, unsigned long long clen, @@ -207,8 +233,7 @@ int hyena_aead_decrypt *mlen = clen - HYENA_TAG_SIZE; /* Set up the key schedule and use it to encrypt the nonce */ - if (!gift128n_init(&ks, k, HYENA_KEY_SIZE)) - return -1; + gift128n_init(&ks, k); Y[0] = 0; if (adlen == 0) Y[0] |= 0x01; @@ -222,7 +247,7 @@ int hyena_aead_decrypt memcpy(D, Y + 8, 8); /* Process the associated data */ - hyena_process_ad(&ks, Y, D, ad, adlen); + hyena_v1_process_ad(&ks, Y, D, ad, adlen); /* Decrypt the ciphertext to produce the plaintext */ clen -= HYENA_TAG_SIZE; @@ -281,3 +306,227 @@ int hyena_aead_decrypt gift128n_encrypt(&ks, Y, Y); return aead_check_tag(mtemp, *mlen, Y, c, HYENA_TAG_SIZE); } + +/** + * \brief Process the associated data for HYENA-v2. + * + * \param ks Key schedule for the GIFT-128 cipher. + * \param Y Internal hash state of HYENA. + * \param D Internal hash state of HYENA. + * \param ad Points to the associated data. + * \param adlen Length of the associated data in bytes. + */ +static void hyena_v2_process_ad + (const gift128n_key_schedule_t *ks, unsigned char Y[16], + unsigned char D[8], const unsigned char *ad, + unsigned long long adlen) +{ + unsigned char feedback[16]; + while (adlen > 16) { + hyena_double_delta(D); + memcpy(feedback, ad, 16); + lw_xor_block(feedback + 8, Y + 8, 8); + lw_xor_block(feedback + 8, D, 8); + lw_xor_block(Y, feedback, 16); + gift128n_encrypt(ks, Y, Y); + ad += 16; + adlen -= 16; + } + if (adlen == 16) { + hyena_triple_delta(D); + memcpy(feedback, ad, 16); + lw_xor_block(feedback + 8, Y + 8, 8); + lw_xor_block(feedback + 8, D, 8); + lw_xor_block(Y, feedback, 16); + } else { + unsigned temp = (unsigned)adlen; + hyena_triple_delta(D); + hyena_triple_delta(D); + memcpy(feedback, ad, temp); + feedback[temp] = 0x01; + memset(feedback + temp + 1, 0, 15 - temp); + if (temp > 8) + lw_xor_block(feedback + 8, Y + 8, temp - 8); + lw_xor_block(feedback + 8, D, 8); + lw_xor_block(Y, feedback, 16); + } +} + +int hyena_v2_aead_encrypt + (unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k) +{ + gift128n_key_schedule_t ks; + unsigned char Y[16]; + unsigned char D[8]; + unsigned char feedback[16]; + unsigned index; + (void)nsec; + + /* Set the length of the returned ciphertext */ + *clen = mlen + HYENA_TAG_SIZE; + + /* Set up the key schedule and use it to encrypt the nonce */ + gift128n_init(&ks, k); + Y[0] = 0; + if (adlen == 0) + Y[0] |= 0x01; + if (adlen == 0 && mlen == 0) + Y[0] |= 0x02; + Y[1] = 0; + Y[2] = 0; + Y[3] = 0; + memcpy(Y + 4, npub, HYENA_NONCE_SIZE); + gift128n_encrypt(&ks, Y, Y); + memcpy(D, Y + 8, 8); + + /* Process the associated data */ + hyena_v2_process_ad(&ks, Y, D, ad, adlen); + + /* Encrypt the plaintext to produce the ciphertext */ + if (mlen > 0) { + while (mlen > 16) { + gift128n_encrypt(&ks, Y, Y); + hyena_double_delta(D); + memcpy(feedback, m, 16); + lw_xor_block(feedback + 8, Y + 8, 8); + lw_xor_block(feedback + 8, D, 8); + lw_xor_block_2_src(c, m, Y, 16); + lw_xor_block(Y, feedback, 16); + c += 16; + m += 16; + mlen -= 16; + } + gift128n_encrypt(&ks, Y, Y); + if (mlen == 16) { + hyena_triple_delta(D); + memcpy(feedback, m, 16); + lw_xor_block(feedback + 8, Y + 8, 8); + lw_xor_block(feedback + 8, D, 8); + lw_xor_block_2_src(c, m, Y, 16); + lw_xor_block(Y, feedback, 16); + c += 16; + } else { + unsigned temp = (unsigned)mlen; + hyena_triple_delta(D); + hyena_triple_delta(D); + memcpy(feedback, m, temp); + feedback[temp] = 0x01; + memset(feedback + temp + 1, 0, 15 - temp); + if (temp > 8) + lw_xor_block(feedback + 8, Y + 8, temp - 8); + lw_xor_block(feedback + 8, D, 8); + lw_xor_block_2_src(c, m, Y, temp); + lw_xor_block(Y, feedback, 16); + c += temp; + } + } + + /* Swap the two halves of Y and generate the authentication tag */ + for (index = 0; index < 8; ++index) { + unsigned char temp1 = Y[index]; + unsigned char temp2 = Y[index + 8]; + Y[index] = temp2; + Y[index + 8] = temp1; + } + gift128n_encrypt(&ks, c, Y); + return 0; +} + +int hyena_v2_aead_decrypt + (unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k) +{ + gift128n_key_schedule_t ks; + unsigned char Y[16]; + unsigned char D[8]; + unsigned char feedback[16]; + unsigned char *mtemp; + unsigned index; + (void)nsec; + + /* Validate the ciphertext length and set the return "mlen" value */ + if (clen < HYENA_TAG_SIZE) + return -1; + *mlen = clen - HYENA_TAG_SIZE; + + /* Set up the key schedule and use it to encrypt the nonce */ + gift128n_init(&ks, k); + Y[0] = 0; + if (adlen == 0) + Y[0] |= 0x01; + if (adlen == 0 && clen == HYENA_TAG_SIZE) + Y[0] |= 0x02; + Y[1] = 0; + Y[2] = 0; + Y[3] = 0; + memcpy(Y + 4, npub, HYENA_NONCE_SIZE); + gift128n_encrypt(&ks, Y, Y); + memcpy(D, Y + 8, 8); + + /* Process the associated data */ + hyena_v2_process_ad(&ks, Y, D, ad, adlen); + + /* Decrypt the ciphertext to produce the plaintext */ + clen -= HYENA_TAG_SIZE; + mtemp = m; + if (clen > 0) { + while (clen > 16) { + gift128n_encrypt(&ks, Y, Y); + hyena_double_delta(D); + memcpy(feedback + 8, c + 8, 8); + lw_xor_block_2_src(m, c, Y, 16); + memcpy(feedback, m, 8); + lw_xor_block(feedback + 8, D, 8); + lw_xor_block(Y, feedback, 16); + c += 16; + m += 16; + clen -= 16; + } + gift128n_encrypt(&ks, Y, Y); + if (clen == 16) { + hyena_triple_delta(D); + memcpy(feedback + 8, c + 8, 8); + lw_xor_block_2_src(m, c, Y, 16); + memcpy(feedback, m, 8); + lw_xor_block(feedback + 8, D, 8); + lw_xor_block(Y, feedback, 16); + c += 16; + } else { + unsigned temp = (unsigned)clen; + hyena_triple_delta(D); + hyena_triple_delta(D); + if (temp > 8) { + memcpy(feedback + 8, c + 8, temp - 8); + lw_xor_block_2_src(m, c, Y, temp); + memcpy(feedback, m, 8); + } else { + lw_xor_block_2_src(m, c, Y, temp); + memcpy(feedback, m, temp); + } + feedback[temp] = 0x01; + memset(feedback + temp + 1, 0, 15 - temp); + lw_xor_block(feedback + 8, D, 8); + lw_xor_block(Y, feedback, 16); + c += temp; + } + } + + /* Swap the two halves of Y and check the authentication tag */ + for (index = 0; index < 8; ++index) { + unsigned char temp1 = Y[index]; + unsigned char temp2 = Y[index + 8]; + Y[index] = temp2; + Y[index + 8] = temp1; + } + gift128n_encrypt(&ks, Y, Y); + return aead_check_tag(mtemp, *mlen, Y, c, HYENA_TAG_SIZE); +} diff --git a/hyena/Implementations/crypto_aead/hyenav1/rhys/hyena.h b/hyena/Implementations/crypto_aead/hyenav1/rhys/hyena.h index ee9bb9c..79585d5 100644 --- a/hyena/Implementations/crypto_aead/hyenav1/rhys/hyena.h +++ b/hyena/Implementations/crypto_aead/hyenav1/rhys/hyena.h @@ -33,6 +33,12 @@ * GIFT-128 block cipher. The algorithm has a 128-bit key, a 96-bit nonce, * and a 128-bit authentication tag. * + * This library implements both the v1 and v2 versions of HYENA from the + * authors. The v1 version was submitted to the second round of the + * NIST Lightweight Cryptography Competition but was later found to have a + * forgery attack. The authors fixed this with v2 but it was too late to + * submit the update for the second round. + * * References: https://www.isical.ac.in/~lightweight/hyena/ */ @@ -56,12 +62,76 @@ extern "C" { #define HYENA_NONCE_SIZE 12 /** - * \brief Meta-information block for the HYENA cipher. + * \brief Meta-information block for the HYENA-v1 cipher. + */ +extern aead_cipher_t const hyena_v1_cipher; + +/** + * \brief Meta-information block for the HYENA-v2 cipher. + */ +extern aead_cipher_t const hyena_v2_cipher; + +/** + * \brief Encrypts and authenticates a packet with HYENA-v1. + * + * \param c Buffer to receive the output. + * \param clen On exit, set to the length of the output which includes + * the ciphertext and the 16 byte authentication tag. + * \param m Buffer that contains the plaintext message to encrypt. + * \param mlen Length of the plaintext message in bytes. + * \param ad Buffer that contains associated data to authenticate + * along with the packet but which does not need to be encrypted. + * \param adlen Length of the associated data in bytes. + * \param nsec Secret nonce - not used by this algorithm. + * \param npub Points to the public nonce for the packet which must + * be 12 bytes in length. + * \param k Points to the 16 bytes of the key to use to encrypt the packet. + * + * \return 0 on success, or a negative value if there was an error in + * the parameters. + * + * \sa hyena_aead_decrypt() */ -extern aead_cipher_t const hyena_cipher; +int hyena_v1_aead_encrypt + (unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k); + +/** + * \brief Decrypts and authenticates a packet with HYENA-v1. + * + * \param m Buffer to receive the plaintext message on output. + * \param mlen Receives the length of the plaintext message on output. + * \param nsec Secret nonce - not used by this algorithm. + * \param c Buffer that contains the ciphertext and authentication + * tag to decrypt. + * \param clen Length of the input data in bytes, which includes the + * ciphertext and the 16 byte authentication tag. + * \param ad Buffer that contains associated data to authenticate + * along with the packet but which does not need to be encrypted. + * \param adlen Length of the associated data in bytes. + * \param npub Points to the public nonce for the packet which must + * be 12 bytes in length. + * \param k Points to the 16 bytes of the key to use to decrypt the packet. + * + * \return 0 on success, -1 if the authentication tag was incorrect, + * or some other negative number if there was an error in the parameters. + * + * \sa hyena_aead_encrypt() + */ +int hyena_v1_aead_decrypt + (unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k); /** - * \brief Encrypts and authenticates a packet with HYENA. + * \brief Encrypts and authenticates a packet with HYENA-v2. * * \param c Buffer to receive the output. * \param clen On exit, set to the length of the output which includes @@ -81,7 +151,7 @@ extern aead_cipher_t const hyena_cipher; * * \sa hyena_aead_decrypt() */ -int hyena_aead_encrypt +int hyena_v2_aead_encrypt (unsigned char *c, unsigned long long *clen, const unsigned char *m, unsigned long long mlen, const unsigned char *ad, unsigned long long adlen, @@ -90,7 +160,7 @@ int hyena_aead_encrypt const unsigned char *k); /** - * \brief Decrypts and authenticates a packet with HYENA. + * \brief Decrypts and authenticates a packet with HYENA-v2. * * \param m Buffer to receive the plaintext message on output. * \param mlen Receives the length of the plaintext message on output. @@ -111,7 +181,7 @@ int hyena_aead_encrypt * * \sa hyena_aead_encrypt() */ -int hyena_aead_decrypt +int hyena_v2_aead_decrypt (unsigned char *m, unsigned long long *mlen, unsigned char *nsec, const unsigned char *c, unsigned long long clen, diff --git a/hyena/Implementations/crypto_aead/hyenav1/rhys/internal-gift128.c b/hyena/Implementations/crypto_aead/hyenav1/rhys/internal-gift128.c index 681dbc8..c6ac5ec 100644 --- a/hyena/Implementations/crypto_aead/hyenav1/rhys/internal-gift128.c +++ b/hyena/Implementations/crypto_aead/hyenav1/rhys/internal-gift128.c @@ -23,8 +23,12 @@ #include "internal-gift128.h" #include "internal-util.h" +#if !GIFT128_VARIANT_ASM + +#if GIFT128_VARIANT != GIFT128_VARIANT_TINY + /* Round constants for GIFT-128 in the fixsliced representation */ -static uint32_t const GIFT128_RC[40] = { +static uint32_t const GIFT128_RC_fixsliced[40] = { 0x10000008, 0x80018000, 0x54000002, 0x01010181, 0x8000001f, 0x10888880, 0x6001e000, 0x51500002, 0x03030180, 0x8000002f, 0x10088880, 0x60016000, 0x41500002, 0x03030080, 0x80000027, 0x10008880, 0x4001e000, 0x11500002, @@ -34,6 +38,246 @@ static uint32_t const GIFT128_RC[40] = { 0xc001a000, 0x14500002, 0x01020181, 0x8000001a }; +#endif + +#if GIFT128_VARIANT != GIFT128_VARIANT_FULL + +/* Round constants for GIFT-128 in the bitsliced representation */ +static uint8_t const GIFT128_RC[40] = { + 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3E, 0x3D, 0x3B, + 0x37, 0x2F, 0x1E, 0x3C, 0x39, 0x33, 0x27, 0x0E, + 0x1D, 0x3A, 0x35, 0x2B, 0x16, 0x2C, 0x18, 0x30, + 0x21, 0x02, 0x05, 0x0B, 0x17, 0x2E, 0x1C, 0x38, + 0x31, 0x23, 0x06, 0x0D, 0x1B, 0x36, 0x2D, 0x1A +}; + +#endif + +/* http://programming.sirrida.de/perm_fn.html#bit_permute_step */ +#define bit_permute_step(_y, mask, shift) \ + do { \ + uint32_t y = (_y); \ + uint32_t t = ((y >> (shift)) ^ y) & (mask); \ + (_y) = (y ^ t) ^ (t << (shift)); \ + } while (0) + +/* + * The permutation below was generated by the online permuation generator at + * "http://programming.sirrida.de/calcperm.php". + * + * All of the permutuations are essentially the same, except that each is + * rotated by 8 bits with respect to the next: + * + * P0: 0 24 16 8 1 25 17 9 2 26 18 10 3 27 19 11 4 28 20 12 5 29 21 13 6 30 22 14 7 31 23 15 + * P1: 8 0 24 16 9 1 25 17 10 2 26 18 11 3 27 19 12 4 28 20 13 5 29 21 14 6 30 22 15 7 31 23 + * P2: 16 8 0 24 17 9 1 25 18 10 2 26 19 11 3 27 20 12 4 28 21 13 5 29 22 14 6 30 23 15 7 31 + * P3: 24 16 8 0 25 17 9 1 26 18 10 2 27 19 11 3 28 20 12 4 29 21 13 5 30 22 14 6 31 23 15 7 + * + * The most efficient permutation from the online generator was P3, so we + * perform it as the core of the others, and then perform a final rotation. + * + * It is possible to do slightly better than "P3 then rotate" on desktop and + * server architectures for the other permutations. But the advantage isn't + * as evident on embedded platforms so we keep things simple. + */ +#define PERM3_INNER(x) \ + do { \ + bit_permute_step(x, 0x0a0a0a0a, 3); \ + bit_permute_step(x, 0x00cc00cc, 6); \ + bit_permute_step(x, 0x0000f0f0, 12); \ + bit_permute_step(x, 0x000000ff, 24); \ + } while (0) +#define PERM0(x) \ + do { \ + uint32_t _x = (x); \ + PERM3_INNER(_x); \ + (x) = leftRotate8(_x); \ + } while (0) +#define PERM1(x) \ + do { \ + uint32_t _x = (x); \ + PERM3_INNER(_x); \ + (x) = leftRotate16(_x); \ + } while (0) +#define PERM2(x) \ + do { \ + uint32_t _x = (x); \ + PERM3_INNER(_x); \ + (x) = leftRotate24(_x); \ + } while (0) +#define PERM3(x) \ + do { \ + uint32_t _x = (x); \ + PERM3_INNER(_x); \ + (x) = _x; \ + } while (0) + +#define INV_PERM3_INNER(x) \ + do { \ + bit_permute_step(x, 0x00550055, 9); \ + bit_permute_step(x, 0x00003333, 18); \ + bit_permute_step(x, 0x000f000f, 12); \ + bit_permute_step(x, 0x000000ff, 24); \ + } while (0) +#define INV_PERM0(x) \ + do { \ + uint32_t _x = rightRotate8(x); \ + INV_PERM3_INNER(_x); \ + (x) = _x; \ + } while (0) +#define INV_PERM1(x) \ + do { \ + uint32_t _x = rightRotate16(x); \ + INV_PERM3_INNER(_x); \ + (x) = _x; \ + } while (0) +#define INV_PERM2(x) \ + do { \ + uint32_t _x = rightRotate24(x); \ + INV_PERM3_INNER(_x); \ + (x) = _x; \ + } while (0) +#define INV_PERM3(x) \ + do { \ + uint32_t _x = (x); \ + INV_PERM3_INNER(_x); \ + (x) = _x; \ + } while (0) + +/** + * \brief Converts the GIFT-128 nibble-based representation into word-based. + * + * \param output Output buffer to write the word-based version to. + * \param input Input buffer to read the nibble-based version from. + * + * The \a input and \a output buffers can be the same buffer. + */ +static void gift128n_to_words + (unsigned char *output, const unsigned char *input) +{ + uint32_t s0, s1, s2, s3; + + /* Load the input buffer into 32-bit words. We use the nibble order + * from the HYENA submission to NIST which is byte-reversed with respect + * to the nibble order of the original GIFT-128 paper. Nibble zero is in + * the first byte instead of the last, which means little-endian order. */ + s0 = le_load_word32(input + 12); + s1 = le_load_word32(input + 8); + s2 = le_load_word32(input + 4); + s3 = le_load_word32(input); + + /* Rearrange the bits so that bits 0..3 of each nibble are + * scattered to bytes 0..3 of each word. The permutation is: + * + * 0 8 16 24 1 9 17 25 2 10 18 26 3 11 19 27 4 12 20 28 5 13 21 29 6 14 22 30 7 15 23 31 + * + * Generated with "http://programming.sirrida.de/calcperm.php". + */ + #define PERM_WORDS(_x) \ + do { \ + uint32_t x = (_x); \ + bit_permute_step(x, 0x0a0a0a0a, 3); \ + bit_permute_step(x, 0x00cc00cc, 6); \ + bit_permute_step(x, 0x0000f0f0, 12); \ + bit_permute_step(x, 0x0000ff00, 8); \ + (_x) = x; \ + } while (0) + PERM_WORDS(s0); + PERM_WORDS(s1); + PERM_WORDS(s2); + PERM_WORDS(s3); + + /* Rearrange the bytes and write them to the output buffer */ + output[0] = (uint8_t)s0; + output[1] = (uint8_t)s1; + output[2] = (uint8_t)s2; + output[3] = (uint8_t)s3; + output[4] = (uint8_t)(s0 >> 8); + output[5] = (uint8_t)(s1 >> 8); + output[6] = (uint8_t)(s2 >> 8); + output[7] = (uint8_t)(s3 >> 8); + output[8] = (uint8_t)(s0 >> 16); + output[9] = (uint8_t)(s1 >> 16); + output[10] = (uint8_t)(s2 >> 16); + output[11] = (uint8_t)(s3 >> 16); + output[12] = (uint8_t)(s0 >> 24); + output[13] = (uint8_t)(s1 >> 24); + output[14] = (uint8_t)(s2 >> 24); + output[15] = (uint8_t)(s3 >> 24); +} + +/** + * \brief Converts the GIFT-128 word-based representation into nibble-based. + * + * \param output Output buffer to write the nibble-based version to. + * \param input Input buffer to read the word-based version from. + */ +static void gift128n_to_nibbles + (unsigned char *output, const unsigned char *input) +{ + uint32_t s0, s1, s2, s3; + + /* Load the input bytes and rearrange them so that s0 contains the + * most significant nibbles and s3 contains the least significant */ + s0 = (((uint32_t)(input[12])) << 24) | + (((uint32_t)(input[8])) << 16) | + (((uint32_t)(input[4])) << 8) | + ((uint32_t)(input[0])); + s1 = (((uint32_t)(input[13])) << 24) | + (((uint32_t)(input[9])) << 16) | + (((uint32_t)(input[5])) << 8) | + ((uint32_t)(input[1])); + s2 = (((uint32_t)(input[14])) << 24) | + (((uint32_t)(input[10])) << 16) | + (((uint32_t)(input[6])) << 8) | + ((uint32_t)(input[2])); + s3 = (((uint32_t)(input[15])) << 24) | + (((uint32_t)(input[11])) << 16) | + (((uint32_t)(input[7])) << 8) | + ((uint32_t)(input[3])); + + /* Apply the inverse of PERM_WORDS() from the function above */ + #define INV_PERM_WORDS(_x) \ + do { \ + uint32_t x = (_x); \ + bit_permute_step(x, 0x00aa00aa, 7); \ + bit_permute_step(x, 0x0000cccc, 14); \ + bit_permute_step(x, 0x00f000f0, 4); \ + bit_permute_step(x, 0x0000ff00, 8); \ + (_x) = x; \ + } while (0) + INV_PERM_WORDS(s0); + INV_PERM_WORDS(s1); + INV_PERM_WORDS(s2); + INV_PERM_WORDS(s3); + + /* Store the result into the output buffer as 32-bit words */ + le_store_word32(output + 12, s0); + le_store_word32(output + 8, s1); + le_store_word32(output + 4, s2); + le_store_word32(output, s3); +} + +void gift128n_encrypt + (const gift128n_key_schedule_t *ks, unsigned char *output, + const unsigned char *input) +{ + gift128n_to_words(output, input); + gift128b_encrypt(ks, output, output); + gift128n_to_nibbles(output, output); +} + +void gift128n_decrypt + (const gift128n_key_schedule_t *ks, unsigned char *output, + const unsigned char *input) +{ + gift128n_to_words(output, input); + gift128b_decrypt(ks, output, output); + gift128n_to_nibbles(output, output); +} + +#if GIFT128_VARIANT != GIFT128_VARIANT_TINY + /** * \brief Swaps bits within two words. * @@ -202,21 +446,27 @@ static void gift128b_compute_round_keys /* Keys 8, 9, 18, and 19 do not need any adjustment */ } +#if GIFT128_VARIANT == GIFT128_VARIANT_FULL /* Derive the fixsliced keys for the remaining rounds 11..40 */ for (index = 20; index < 80; index += 10) { gift128b_derive_keys(ks->k + index, ks->k + index - 20); } +#endif } -int gift128b_init - (gift128b_key_schedule_t *ks, const unsigned char *key, size_t key_len) +void gift128b_init(gift128b_key_schedule_t *ks, const unsigned char *key) { - if (!ks || !key || key_len != 16) - return 0; gift128b_compute_round_keys (ks, be_load_word32(key), be_load_word32(key + 4), be_load_word32(key + 8), be_load_word32(key + 12)); - return 1; +} + +void gift128n_init(gift128n_key_schedule_t *ks, const unsigned char *key) +{ + /* Use the little-endian key byte order from the HYENA submission */ + gift128b_compute_round_keys + (ks, le_load_word32(key + 12), le_load_word32(key + 8), + le_load_word32(key + 4), le_load_word32(key)); } /** @@ -521,11 +771,37 @@ int gift128b_init gift128b_inv_sbox(s3, s1, s2, s0); \ } while (0) +#else /* GIFT128_VARIANT_TINY */ + +void gift128b_init(gift128b_key_schedule_t *ks, const unsigned char *key) +{ + /* Mirror the fixslicing word order of 3, 1, 2, 0 */ + ks->k[0] = be_load_word32(key + 12); + ks->k[1] = be_load_word32(key + 4); + ks->k[2] = be_load_word32(key + 8); + ks->k[3] = be_load_word32(key); +} + +void gift128n_init(gift128n_key_schedule_t *ks, const unsigned char *key) +{ + /* Use the little-endian key byte order from the HYENA submission + * and mirror the fixslicing word order of 3, 1, 2, 0 */ + ks->k[0] = le_load_word32(key); + ks->k[1] = le_load_word32(key + 8); + ks->k[2] = le_load_word32(key + 4); + ks->k[3] = le_load_word32(key + 12); +} + +#endif /* GIFT128_VARIANT_TINY */ + +#if GIFT128_VARIANT == GIFT128_VARIANT_SMALL + void gift128b_encrypt (const gift128b_key_schedule_t *ks, unsigned char *output, const unsigned char *input) { uint32_t s0, s1, s2, s3; + uint32_t k[20]; /* Copy the plaintext into the state buffer and convert from big endian */ s0 = be_load_word32(input); @@ -534,14 +810,20 @@ void gift128b_encrypt s3 = be_load_word32(input + 12); /* Perform all 40 rounds five at a time using the fixsliced method */ - gift128b_encrypt_5_rounds(ks->k, GIFT128_RC); - gift128b_encrypt_5_rounds(ks->k + 10, GIFT128_RC + 5); - gift128b_encrypt_5_rounds(ks->k + 20, GIFT128_RC + 10); - gift128b_encrypt_5_rounds(ks->k + 30, GIFT128_RC + 15); - gift128b_encrypt_5_rounds(ks->k + 40, GIFT128_RC + 20); - gift128b_encrypt_5_rounds(ks->k + 50, GIFT128_RC + 25); - gift128b_encrypt_5_rounds(ks->k + 60, GIFT128_RC + 30); - gift128b_encrypt_5_rounds(ks->k + 70, GIFT128_RC + 35); + gift128b_encrypt_5_rounds(ks->k, GIFT128_RC_fixsliced); + gift128b_encrypt_5_rounds(ks->k + 10, GIFT128_RC_fixsliced + 5); + gift128b_derive_keys(k, ks->k); + gift128b_derive_keys(k + 10, ks->k + 10); + gift128b_encrypt_5_rounds(k, GIFT128_RC_fixsliced + 10); + gift128b_encrypt_5_rounds(k + 10, GIFT128_RC_fixsliced + 15); + gift128b_derive_keys(k, k); + gift128b_derive_keys(k + 10, k + 10); + gift128b_encrypt_5_rounds(k, GIFT128_RC_fixsliced + 20); + gift128b_encrypt_5_rounds(k + 10, GIFT128_RC_fixsliced + 25); + gift128b_derive_keys(k, k); + gift128b_derive_keys(k + 10, k + 10); + gift128b_encrypt_5_rounds(k, GIFT128_RC_fixsliced + 30); + gift128b_encrypt_5_rounds(k + 10, GIFT128_RC_fixsliced + 35); /* Pack the state into the ciphertext buffer in big endian */ be_store_word32(output, s0); @@ -555,6 +837,7 @@ void gift128b_encrypt_preloaded const uint32_t input[4]) { uint32_t s0, s1, s2, s3; + uint32_t k[20]; /* Copy the plaintext into local variables */ s0 = input[0]; @@ -563,14 +846,20 @@ void gift128b_encrypt_preloaded s3 = input[3]; /* Perform all 40 rounds five at a time using the fixsliced method */ - gift128b_encrypt_5_rounds(ks->k, GIFT128_RC); - gift128b_encrypt_5_rounds(ks->k + 10, GIFT128_RC + 5); - gift128b_encrypt_5_rounds(ks->k + 20, GIFT128_RC + 10); - gift128b_encrypt_5_rounds(ks->k + 30, GIFT128_RC + 15); - gift128b_encrypt_5_rounds(ks->k + 40, GIFT128_RC + 20); - gift128b_encrypt_5_rounds(ks->k + 50, GIFT128_RC + 25); - gift128b_encrypt_5_rounds(ks->k + 60, GIFT128_RC + 30); - gift128b_encrypt_5_rounds(ks->k + 70, GIFT128_RC + 35); + gift128b_encrypt_5_rounds(ks->k, GIFT128_RC_fixsliced); + gift128b_encrypt_5_rounds(ks->k + 10, GIFT128_RC_fixsliced + 5); + gift128b_derive_keys(k, ks->k); + gift128b_derive_keys(k + 10, ks->k + 10); + gift128b_encrypt_5_rounds(k, GIFT128_RC_fixsliced + 10); + gift128b_encrypt_5_rounds(k + 10, GIFT128_RC_fixsliced + 15); + gift128b_derive_keys(k, k); + gift128b_derive_keys(k + 10, k + 10); + gift128b_encrypt_5_rounds(k, GIFT128_RC_fixsliced + 20); + gift128b_encrypt_5_rounds(k + 10, GIFT128_RC_fixsliced + 25); + gift128b_derive_keys(k, k); + gift128b_derive_keys(k + 10, k + 10); + gift128b_encrypt_5_rounds(k, GIFT128_RC_fixsliced + 30); + gift128b_encrypt_5_rounds(k + 10, GIFT128_RC_fixsliced + 35); /* Pack the state into the ciphertext buffer */ output[0] = s0; @@ -579,7 +868,55 @@ void gift128b_encrypt_preloaded output[3] = s3; } -void gift128b_decrypt +void gift128t_encrypt + (const gift128n_key_schedule_t *ks, unsigned char *output, + const unsigned char *input, uint32_t tweak) +{ + uint32_t s0, s1, s2, s3; + uint32_t k[20]; + + /* Copy the plaintext into the state buffer and convert from nibbles */ + gift128n_to_words(output, input); + s0 = be_load_word32(output); + s1 = be_load_word32(output + 4); + s2 = be_load_word32(output + 8); + s3 = be_load_word32(output + 12); + + /* Perform all 40 rounds five at a time using the fixsliced method. + * Every 5 rounds except the last we add the tweak value to the state */ + gift128b_encrypt_5_rounds(ks->k, GIFT128_RC_fixsliced); + s0 ^= tweak; + gift128b_encrypt_5_rounds(ks->k + 10, GIFT128_RC_fixsliced + 5); + s0 ^= tweak; + gift128b_derive_keys(k, ks->k); + gift128b_derive_keys(k + 10, ks->k + 10); + gift128b_encrypt_5_rounds(k, GIFT128_RC_fixsliced + 10); + s0 ^= tweak; + gift128b_encrypt_5_rounds(k + 10, GIFT128_RC_fixsliced + 15); + s0 ^= tweak; + gift128b_derive_keys(k, k); + gift128b_derive_keys(k + 10, k + 10); + gift128b_encrypt_5_rounds(k, GIFT128_RC_fixsliced + 20); + s0 ^= tweak; + gift128b_encrypt_5_rounds(k + 10, GIFT128_RC_fixsliced + 25); + s0 ^= tweak; + gift128b_derive_keys(k, k); + gift128b_derive_keys(k + 10, k + 10); + gift128b_encrypt_5_rounds(k, GIFT128_RC_fixsliced + 30); + s0 ^= tweak; + gift128b_encrypt_5_rounds(k + 10, GIFT128_RC_fixsliced + 35); + + /* Pack the state into the ciphertext buffer in nibble form */ + be_store_word32(output, s0); + be_store_word32(output + 4, s1); + be_store_word32(output + 8, s2); + be_store_word32(output + 12, s3); + gift128n_to_nibbles(output, output); +} + +#elif GIFT128_VARIANT == GIFT128_VARIANT_FULL + +void gift128b_encrypt (const gift128b_key_schedule_t *ks, unsigned char *output, const unsigned char *input) { @@ -592,14 +929,14 @@ void gift128b_decrypt s3 = be_load_word32(input + 12); /* Perform all 40 rounds five at a time using the fixsliced method */ - gift128b_decrypt_5_rounds(ks->k + 70, GIFT128_RC + 35); - gift128b_decrypt_5_rounds(ks->k + 60, GIFT128_RC + 30); - gift128b_decrypt_5_rounds(ks->k + 50, GIFT128_RC + 25); - gift128b_decrypt_5_rounds(ks->k + 40, GIFT128_RC + 20); - gift128b_decrypt_5_rounds(ks->k + 30, GIFT128_RC + 15); - gift128b_decrypt_5_rounds(ks->k + 20, GIFT128_RC + 10); - gift128b_decrypt_5_rounds(ks->k + 10, GIFT128_RC + 5); - gift128b_decrypt_5_rounds(ks->k, GIFT128_RC); + gift128b_encrypt_5_rounds(ks->k, GIFT128_RC_fixsliced); + gift128b_encrypt_5_rounds(ks->k + 10, GIFT128_RC_fixsliced + 5); + gift128b_encrypt_5_rounds(ks->k + 20, GIFT128_RC_fixsliced + 10); + gift128b_encrypt_5_rounds(ks->k + 30, GIFT128_RC_fixsliced + 15); + gift128b_encrypt_5_rounds(ks->k + 40, GIFT128_RC_fixsliced + 20); + gift128b_encrypt_5_rounds(ks->k + 50, GIFT128_RC_fixsliced + 25); + gift128b_encrypt_5_rounds(ks->k + 60, GIFT128_RC_fixsliced + 30); + gift128b_encrypt_5_rounds(ks->k + 70, GIFT128_RC_fixsliced + 35); /* Pack the state into the ciphertext buffer in big endian */ be_store_word32(output, s0); @@ -608,173 +945,308 @@ void gift128b_decrypt be_store_word32(output + 12, s3); } -int gift128n_init - (gift128n_key_schedule_t *ks, const unsigned char *key, size_t key_len) +void gift128b_encrypt_preloaded + (const gift128b_key_schedule_t *ks, uint32_t output[4], + const uint32_t input[4]) { - /* Use the little-endian key byte order from the HYENA submission */ - if (!ks || !key || key_len != 16) - return 0; - gift128b_compute_round_keys - (ks, le_load_word32(key + 12), le_load_word32(key + 8), - le_load_word32(key + 4), le_load_word32(key)); - return 1; + uint32_t s0, s1, s2, s3; + + /* Copy the plaintext into local variables */ + s0 = input[0]; + s1 = input[1]; + s2 = input[2]; + s3 = input[3]; + + /* Perform all 40 rounds five at a time using the fixsliced method */ + gift128b_encrypt_5_rounds(ks->k, GIFT128_RC_fixsliced); + gift128b_encrypt_5_rounds(ks->k + 10, GIFT128_RC_fixsliced + 5); + gift128b_encrypt_5_rounds(ks->k + 20, GIFT128_RC_fixsliced + 10); + gift128b_encrypt_5_rounds(ks->k + 30, GIFT128_RC_fixsliced + 15); + gift128b_encrypt_5_rounds(ks->k + 40, GIFT128_RC_fixsliced + 20); + gift128b_encrypt_5_rounds(ks->k + 50, GIFT128_RC_fixsliced + 25); + gift128b_encrypt_5_rounds(ks->k + 60, GIFT128_RC_fixsliced + 30); + gift128b_encrypt_5_rounds(ks->k + 70, GIFT128_RC_fixsliced + 35); + + /* Pack the state into the ciphertext buffer */ + output[0] = s0; + output[1] = s1; + output[2] = s2; + output[3] = s3; } -/* http://programming.sirrida.de/perm_fn.html#bit_permute_step */ -#define bit_permute_step(_y, mask, shift) \ - do { \ - uint32_t y = (_y); \ - uint32_t t = ((y >> (shift)) ^ y) & (mask); \ - (_y) = (y ^ t) ^ (t << (shift)); \ - } while (0) +void gift128t_encrypt + (const gift128n_key_schedule_t *ks, unsigned char *output, + const unsigned char *input, uint32_t tweak) +{ + uint32_t s0, s1, s2, s3; -/** - * \brief Converts the GIFT-128 nibble-based representation into word-based. - * - * \param output Output buffer to write the word-based version to. - * \param input Input buffer to read the nibble-based version from. - * - * The \a input and \a output buffers can be the same buffer. - */ -static void gift128n_to_words - (unsigned char *output, const unsigned char *input) + /* Copy the plaintext into the state buffer and convert from nibbles */ + gift128n_to_words(output, input); + s0 = be_load_word32(output); + s1 = be_load_word32(output + 4); + s2 = be_load_word32(output + 8); + s3 = be_load_word32(output + 12); + + /* Perform all 40 rounds five at a time using the fixsliced method. + * Every 5 rounds except the last we add the tweak value to the state */ + gift128b_encrypt_5_rounds(ks->k, GIFT128_RC_fixsliced); + s0 ^= tweak; + gift128b_encrypt_5_rounds(ks->k + 10, GIFT128_RC_fixsliced + 5); + s0 ^= tweak; + gift128b_encrypt_5_rounds(ks->k + 20, GIFT128_RC_fixsliced + 10); + s0 ^= tweak; + gift128b_encrypt_5_rounds(ks->k + 30, GIFT128_RC_fixsliced + 15); + s0 ^= tweak; + gift128b_encrypt_5_rounds(ks->k + 40, GIFT128_RC_fixsliced + 20); + s0 ^= tweak; + gift128b_encrypt_5_rounds(ks->k + 50, GIFT128_RC_fixsliced + 25); + s0 ^= tweak; + gift128b_encrypt_5_rounds(ks->k + 60, GIFT128_RC_fixsliced + 30); + s0 ^= tweak; + gift128b_encrypt_5_rounds(ks->k + 70, GIFT128_RC_fixsliced + 35); + + /* Pack the state into the ciphertext buffer in nibble form */ + be_store_word32(output, s0); + be_store_word32(output + 4, s1); + be_store_word32(output + 8, s2); + be_store_word32(output + 12, s3); + gift128n_to_nibbles(output, output); +} + +#else /* GIFT128_VARIANT_TINY */ + +void gift128b_encrypt + (const gift128b_key_schedule_t *ks, unsigned char *output, + const unsigned char *input) { uint32_t s0, s1, s2, s3; + uint32_t w0, w1, w2, w3; + uint32_t temp; + uint8_t round; - /* Load the input buffer into 32-bit words. We use the nibble order - * from the HYENA submission to NIST which is byte-reversed with respect - * to the nibble order of the original GIFT-128 paper. Nibble zero is in - * the first byte instead of the last, which means little-endian order. */ - s0 = le_load_word32(input + 12); - s1 = le_load_word32(input + 8); - s2 = le_load_word32(input + 4); - s3 = le_load_word32(input); + /* Copy the plaintext into the state buffer and convert from big endian */ + s0 = be_load_word32(input); + s1 = be_load_word32(input + 4); + s2 = be_load_word32(input + 8); + s3 = be_load_word32(input + 12); - /* Rearrange the bits so that bits 0..3 of each nibble are - * scattered to bytes 0..3 of each word. The permutation is: - * - * 0 8 16 24 1 9 17 25 2 10 18 26 3 11 19 27 4 12 20 28 5 13 21 29 6 14 22 30 7 15 23 31 - * - * Generated with "http://programming.sirrida.de/calcperm.php". - */ - #define PERM_WORDS(_x) \ - do { \ - uint32_t x = (_x); \ - bit_permute_step(x, 0x0a0a0a0a, 3); \ - bit_permute_step(x, 0x00cc00cc, 6); \ - bit_permute_step(x, 0x0000f0f0, 12); \ - bit_permute_step(x, 0x0000ff00, 8); \ - (_x) = x; \ - } while (0) - PERM_WORDS(s0); - PERM_WORDS(s1); - PERM_WORDS(s2); - PERM_WORDS(s3); + /* The key schedule is initialized with the key itself */ + w0 = ks->k[3]; + w1 = ks->k[1]; + w2 = ks->k[2]; + w3 = ks->k[0]; + + /* Perform all 40 rounds */ + for (round = 0; round < 40; ++round) { + /* SubCells - apply the S-box */ + s1 ^= s0 & s2; + s0 ^= s1 & s3; + s2 ^= s0 | s1; + s3 ^= s2; + s1 ^= s3; + s3 ^= 0xFFFFFFFFU; + s2 ^= s0 & s1; + temp = s0; + s0 = s3; + s3 = temp; + + /* PermBits - apply the 128-bit permutation */ + PERM0(s0); + PERM1(s1); + PERM2(s2); + PERM3(s3); + + /* AddRoundKey - XOR in the key schedule and the round constant */ + s2 ^= w1; + s1 ^= w3; + s3 ^= 0x80000000U ^ GIFT128_RC[round]; + + /* Rotate the key schedule */ + temp = w3; + w3 = w2; + w2 = w1; + w1 = w0; + w0 = ((temp & 0xFFFC0000U) >> 2) | ((temp & 0x00030000U) << 14) | + ((temp & 0x00000FFFU) << 4) | ((temp & 0x0000F000U) >> 12); + } - /* Rearrange the bytes and write them to the output buffer */ - output[0] = (uint8_t)s0; - output[1] = (uint8_t)s1; - output[2] = (uint8_t)s2; - output[3] = (uint8_t)s3; - output[4] = (uint8_t)(s0 >> 8); - output[5] = (uint8_t)(s1 >> 8); - output[6] = (uint8_t)(s2 >> 8); - output[7] = (uint8_t)(s3 >> 8); - output[8] = (uint8_t)(s0 >> 16); - output[9] = (uint8_t)(s1 >> 16); - output[10] = (uint8_t)(s2 >> 16); - output[11] = (uint8_t)(s3 >> 16); - output[12] = (uint8_t)(s0 >> 24); - output[13] = (uint8_t)(s1 >> 24); - output[14] = (uint8_t)(s2 >> 24); - output[15] = (uint8_t)(s3 >> 24); + /* Pack the state into the ciphertext buffer in big endian */ + be_store_word32(output, s0); + be_store_word32(output + 4, s1); + be_store_word32(output + 8, s2); + be_store_word32(output + 12, s3); } -/** - * \brief Converts the GIFT-128 word-based representation into nibble-based. - * - * \param output Output buffer to write the nibble-based version to. - * \param input Input buffer to read the word-based version from. - */ -static void gift128n_to_nibbles - (unsigned char *output, const unsigned char *input) +void gift128b_encrypt_preloaded + (const gift128b_key_schedule_t *ks, uint32_t output[4], + const uint32_t input[4]) { uint32_t s0, s1, s2, s3; + uint32_t w0, w1, w2, w3; + uint32_t temp; + uint8_t round; - /* Load the input bytes and rearrange them so that s0 contains the - * most significant nibbles and s3 contains the least significant */ - s0 = (((uint32_t)(input[12])) << 24) | - (((uint32_t)(input[8])) << 16) | - (((uint32_t)(input[4])) << 8) | - ((uint32_t)(input[0])); - s1 = (((uint32_t)(input[13])) << 24) | - (((uint32_t)(input[9])) << 16) | - (((uint32_t)(input[5])) << 8) | - ((uint32_t)(input[1])); - s2 = (((uint32_t)(input[14])) << 24) | - (((uint32_t)(input[10])) << 16) | - (((uint32_t)(input[6])) << 8) | - ((uint32_t)(input[2])); - s3 = (((uint32_t)(input[15])) << 24) | - (((uint32_t)(input[11])) << 16) | - (((uint32_t)(input[7])) << 8) | - ((uint32_t)(input[3])); + /* Copy the plaintext into the state buffer */ + s0 = input[0]; + s1 = input[1]; + s2 = input[2]; + s3 = input[3]; - /* Apply the inverse of PERM_WORDS() from the function above */ - #define INV_PERM_WORDS(_x) \ - do { \ - uint32_t x = (_x); \ - bit_permute_step(x, 0x00aa00aa, 7); \ - bit_permute_step(x, 0x0000cccc, 14); \ - bit_permute_step(x, 0x00f000f0, 4); \ - bit_permute_step(x, 0x0000ff00, 8); \ - (_x) = x; \ - } while (0) - INV_PERM_WORDS(s0); - INV_PERM_WORDS(s1); - INV_PERM_WORDS(s2); - INV_PERM_WORDS(s3); + /* The key schedule is initialized with the key itself */ + w0 = ks->k[3]; + w1 = ks->k[1]; + w2 = ks->k[2]; + w3 = ks->k[0]; + + /* Perform all 40 rounds */ + for (round = 0; round < 40; ++round) { + /* SubCells - apply the S-box */ + s1 ^= s0 & s2; + s0 ^= s1 & s3; + s2 ^= s0 | s1; + s3 ^= s2; + s1 ^= s3; + s3 ^= 0xFFFFFFFFU; + s2 ^= s0 & s1; + temp = s0; + s0 = s3; + s3 = temp; + + /* PermBits - apply the 128-bit permutation */ + PERM0(s0); + PERM1(s1); + PERM2(s2); + PERM3(s3); + + /* AddRoundKey - XOR in the key schedule and the round constant */ + s2 ^= w1; + s1 ^= w3; + s3 ^= 0x80000000U ^ GIFT128_RC[round]; + + /* Rotate the key schedule */ + temp = w3; + w3 = w2; + w2 = w1; + w1 = w0; + w0 = ((temp & 0xFFFC0000U) >> 2) | ((temp & 0x00030000U) << 14) | + ((temp & 0x00000FFFU) << 4) | ((temp & 0x0000F000U) >> 12); + } - /* Store the result into the output buffer as 32-bit words */ - le_store_word32(output + 12, s0); - le_store_word32(output + 8, s1); - le_store_word32(output + 4, s2); - le_store_word32(output, s3); + /* Pack the state into the ciphertext buffer */ + output[0] = s0; + output[1] = s1; + output[2] = s2; + output[3] = s3; } -void gift128n_encrypt +void gift128t_encrypt (const gift128n_key_schedule_t *ks, unsigned char *output, - const unsigned char *input) + const unsigned char *input, uint32_t tweak) { + uint32_t s0, s1, s2, s3; + uint32_t w0, w1, w2, w3; + uint32_t temp; + uint8_t round; + + /* Copy the plaintext into the state buffer and convert from nibbles */ gift128n_to_words(output, input); - gift128b_encrypt(ks, output, output); + s0 = be_load_word32(output); + s1 = be_load_word32(output + 4); + s2 = be_load_word32(output + 8); + s3 = be_load_word32(output + 12); + + /* The key schedule is initialized with the key itself */ + w0 = ks->k[3]; + w1 = ks->k[1]; + w2 = ks->k[2]; + w3 = ks->k[0]; + + /* Perform all 40 rounds */ + for (round = 0; round < 40; ++round) { + /* SubCells - apply the S-box */ + s1 ^= s0 & s2; + s0 ^= s1 & s3; + s2 ^= s0 | s1; + s3 ^= s2; + s1 ^= s3; + s3 ^= 0xFFFFFFFFU; + s2 ^= s0 & s1; + temp = s0; + s0 = s3; + s3 = temp; + + /* PermBits - apply the 128-bit permutation */ + PERM0(s0); + PERM1(s1); + PERM2(s2); + PERM3(s3); + + /* AddRoundKey - XOR in the key schedule and the round constant */ + s2 ^= w1; + s1 ^= w3; + s3 ^= 0x80000000U ^ GIFT128_RC[round]; + + /* AddTweak - XOR in the tweak every 5 rounds except the last */ + if (((round + 1) % 5) == 0 && round < 39) + s0 ^= tweak; + + /* Rotate the key schedule */ + temp = w3; + w3 = w2; + w2 = w1; + w1 = w0; + w0 = ((temp & 0xFFFC0000U) >> 2) | ((temp & 0x00030000U) << 14) | + ((temp & 0x00000FFFU) << 4) | ((temp & 0x0000F000U) >> 12); + } + + /* Pack the state into the ciphertext buffer in nibble form */ + be_store_word32(output, s0); + be_store_word32(output + 4, s1); + be_store_word32(output + 8, s2); + be_store_word32(output + 12, s3); gift128n_to_nibbles(output, output); } -void gift128n_decrypt - (const gift128n_key_schedule_t *ks, unsigned char *output, +#endif /* GIFT128_VARIANT_TINY */ + +#if GIFT128_VARIANT == GIFT128_VARIANT_FULL + +void gift128b_decrypt + (const gift128b_key_schedule_t *ks, unsigned char *output, const unsigned char *input) { - gift128n_to_words(output, input); - gift128b_decrypt(ks, output, output); - gift128n_to_nibbles(output, output); -} + uint32_t s0, s1, s2, s3; -/* 4-bit tweak values expanded to 32-bit */ -static uint32_t const GIFT128_tweaks[16] = { - 0x00000000, 0xe1e1e1e1, 0xd2d2d2d2, 0x33333333, - 0xb4b4b4b4, 0x55555555, 0x66666666, 0x87878787, - 0x78787878, 0x99999999, 0xaaaaaaaa, 0x4b4b4b4b, - 0xcccccccc, 0x2d2d2d2d, 0x1e1e1e1e, 0xffffffff -}; + /* Copy the plaintext into the state buffer and convert from big endian */ + s0 = be_load_word32(input); + s1 = be_load_word32(input + 4); + s2 = be_load_word32(input + 8); + s3 = be_load_word32(input + 12); -void gift128t_encrypt + /* Perform all 40 rounds five at a time using the fixsliced method */ + gift128b_decrypt_5_rounds(ks->k + 70, GIFT128_RC_fixsliced + 35); + gift128b_decrypt_5_rounds(ks->k + 60, GIFT128_RC_fixsliced + 30); + gift128b_decrypt_5_rounds(ks->k + 50, GIFT128_RC_fixsliced + 25); + gift128b_decrypt_5_rounds(ks->k + 40, GIFT128_RC_fixsliced + 20); + gift128b_decrypt_5_rounds(ks->k + 30, GIFT128_RC_fixsliced + 15); + gift128b_decrypt_5_rounds(ks->k + 20, GIFT128_RC_fixsliced + 10); + gift128b_decrypt_5_rounds(ks->k + 10, GIFT128_RC_fixsliced + 5); + gift128b_decrypt_5_rounds(ks->k, GIFT128_RC_fixsliced); + + /* Pack the state into the ciphertext buffer in big endian */ + be_store_word32(output, s0); + be_store_word32(output + 4, s1); + be_store_word32(output + 8, s2); + be_store_word32(output + 12, s3); +} + +void gift128t_decrypt (const gift128n_key_schedule_t *ks, unsigned char *output, - const unsigned char *input, unsigned char tweak) + const unsigned char *input, uint32_t tweak) { - uint32_t s0, s1, s2, s3, tword; + uint32_t s0, s1, s2, s3; - /* Copy the plaintext into the state buffer and convert from nibbles */ + /* Copy the ciphertext into the state buffer and convert from nibbles */ gift128n_to_words(output, input); s0 = be_load_word32(output); s1 = be_load_word32(output + 4); @@ -782,25 +1254,24 @@ void gift128t_encrypt s3 = be_load_word32(output + 12); /* Perform all 40 rounds five at a time using the fixsliced method. - * Every 5 rounds except the last we add the tweak value to the state */ - tword = GIFT128_tweaks[tweak]; - gift128b_encrypt_5_rounds(ks->k, GIFT128_RC); - s0 ^= tword; - gift128b_encrypt_5_rounds(ks->k + 10, GIFT128_RC + 5); - s0 ^= tword; - gift128b_encrypt_5_rounds(ks->k + 20, GIFT128_RC + 10); - s0 ^= tword; - gift128b_encrypt_5_rounds(ks->k + 30, GIFT128_RC + 15); - s0 ^= tword; - gift128b_encrypt_5_rounds(ks->k + 40, GIFT128_RC + 20); - s0 ^= tword; - gift128b_encrypt_5_rounds(ks->k + 50, GIFT128_RC + 25); - s0 ^= tword; - gift128b_encrypt_5_rounds(ks->k + 60, GIFT128_RC + 30); - s0 ^= tword; - gift128b_encrypt_5_rounds(ks->k + 70, GIFT128_RC + 35); + * Every 5 rounds except the first we add the tweak value to the state */ + gift128b_decrypt_5_rounds(ks->k + 70, GIFT128_RC_fixsliced + 35); + s0 ^= tweak; + gift128b_decrypt_5_rounds(ks->k + 60, GIFT128_RC_fixsliced + 30); + s0 ^= tweak; + gift128b_decrypt_5_rounds(ks->k + 50, GIFT128_RC_fixsliced + 25); + s0 ^= tweak; + gift128b_decrypt_5_rounds(ks->k + 40, GIFT128_RC_fixsliced + 20); + s0 ^= tweak; + gift128b_decrypt_5_rounds(ks->k + 30, GIFT128_RC_fixsliced + 15); + s0 ^= tweak; + gift128b_decrypt_5_rounds(ks->k + 20, GIFT128_RC_fixsliced + 10); + s0 ^= tweak; + gift128b_decrypt_5_rounds(ks->k + 10, GIFT128_RC_fixsliced + 5); + s0 ^= tweak; + gift128b_decrypt_5_rounds(ks->k, GIFT128_RC_fixsliced); - /* Pack the state into the ciphertext buffer in nibble form */ + /* Pack the state into the plaintext buffer in nibble form */ be_store_word32(output, s0); be_store_word32(output + 4, s1); be_store_word32(output + 8, s2); @@ -808,37 +1279,211 @@ void gift128t_encrypt gift128n_to_nibbles(output, output); } +#else /* GIFT128_VARIANT_SMALL || GIFT128_VARIANT_TINY */ + +/* The small variant uses fixslicing for encryption, but we need to change + * to bitslicing for decryption because of the difficulty of fast-forwarding + * the fixsliced key schedule to the end. So the tiny variant is used for + * decryption when the small variant is selected. Since the NIST AEAD modes + * for GIFT-128 only use the block encrypt operation, the inefficiencies + * in decryption don't matter all that much */ + +/** + * \def gift128b_load_and_forward_schedule() + * \brief Generate the decryption key at the end of the last round. + * + * To do that, we run the block operation forward to determine the + * final state of the key schedule after the last round: + * + * w0 = ks->k[0]; + * w1 = ks->k[1]; + * w2 = ks->k[2]; + * w3 = ks->k[3]; + * for (round = 0; round < 40; ++round) { + * temp = w3; + * w3 = w2; + * w2 = w1; + * w1 = w0; + * w0 = ((temp & 0xFFFC0000U) >> 2) | ((temp & 0x00030000U) << 14) | + * ((temp & 0x00000FFFU) << 4) | ((temp & 0x0000F000U) >> 12); + * } + * + * We can short-cut all of the above by noticing that we don't need + * to do the word rotations. Every 4 rounds, the rotation alignment + * returns to the original position and each word has been rotated + * by applying the "2 right and 4 left" bit-rotation step to it. + * We then repeat that 10 times for the full 40 rounds. The overall + * effect is to apply a "20 right and 40 left" bit-rotation to every + * word in the key schedule. That is equivalent to "4 right and 8 left" + * on the 16-bit sub-words. + */ +#if GIFT128_VARIANT != GIFT128_VARIANT_SMALL +#define gift128b_load_and_forward_schedule() \ + do { \ + w0 = ks->k[3]; \ + w1 = ks->k[1]; \ + w2 = ks->k[2]; \ + w3 = ks->k[0]; \ + w0 = ((w0 & 0xFFF00000U) >> 4) | ((w0 & 0x000F0000U) << 12) | \ + ((w0 & 0x000000FFU) << 8) | ((w0 & 0x0000FF00U) >> 8); \ + w1 = ((w1 & 0xFFF00000U) >> 4) | ((w1 & 0x000F0000U) << 12) | \ + ((w1 & 0x000000FFU) << 8) | ((w1 & 0x0000FF00U) >> 8); \ + w2 = ((w2 & 0xFFF00000U) >> 4) | ((w2 & 0x000F0000U) << 12) | \ + ((w2 & 0x000000FFU) << 8) | ((w2 & 0x0000FF00U) >> 8); \ + w3 = ((w3 & 0xFFF00000U) >> 4) | ((w3 & 0x000F0000U) << 12) | \ + ((w3 & 0x000000FFU) << 8) | ((w3 & 0x0000FF00U) >> 8); \ + } while (0) +#else +/* The small variant needs to also undo some of the rotations that were + * done to generate the fixsliced version of the key schedule */ +#define gift128b_load_and_forward_schedule() \ + do { \ + w0 = ks->k[3]; \ + w1 = ks->k[1]; \ + w2 = ks->k[2]; \ + w3 = ks->k[0]; \ + gift128b_swap_move(w3, w3, 0x000000FFU, 24); \ + gift128b_swap_move(w3, w3, 0x00003333U, 18); \ + gift128b_swap_move(w3, w3, 0x000F000FU, 12); \ + gift128b_swap_move(w3, w3, 0x00550055U, 9); \ + gift128b_swap_move(w1, w1, 0x000000FFU, 24); \ + gift128b_swap_move(w1, w1, 0x00003333U, 18); \ + gift128b_swap_move(w1, w1, 0x000F000FU, 12); \ + gift128b_swap_move(w1, w1, 0x00550055U, 9); \ + gift128b_swap_move(w2, w2, 0x000000FFU, 24); \ + gift128b_swap_move(w2, w2, 0x000F000FU, 12); \ + gift128b_swap_move(w2, w2, 0x03030303U, 6); \ + gift128b_swap_move(w2, w2, 0x11111111U, 3); \ + gift128b_swap_move(w0, w0, 0x000000FFU, 24); \ + gift128b_swap_move(w0, w0, 0x000F000FU, 12); \ + gift128b_swap_move(w0, w0, 0x03030303U, 6); \ + gift128b_swap_move(w0, w0, 0x11111111U, 3); \ + w0 = ((w0 & 0xFFF00000U) >> 4) | ((w0 & 0x000F0000U) << 12) | \ + ((w0 & 0x000000FFU) << 8) | ((w0 & 0x0000FF00U) >> 8); \ + w1 = ((w1 & 0xFFF00000U) >> 4) | ((w1 & 0x000F0000U) << 12) | \ + ((w1 & 0x000000FFU) << 8) | ((w1 & 0x0000FF00U) >> 8); \ + w2 = ((w2 & 0xFFF00000U) >> 4) | ((w2 & 0x000F0000U) << 12) | \ + ((w2 & 0x000000FFU) << 8) | ((w2 & 0x0000FF00U) >> 8); \ + w3 = ((w3 & 0xFFF00000U) >> 4) | ((w3 & 0x000F0000U) << 12) | \ + ((w3 & 0x000000FFU) << 8) | ((w3 & 0x0000FF00U) >> 8); \ + } while (0) +#endif + +void gift128b_decrypt + (const gift128b_key_schedule_t *ks, unsigned char *output, + const unsigned char *input) +{ + uint32_t s0, s1, s2, s3; + uint32_t w0, w1, w2, w3; + uint32_t temp; + uint8_t round; + + /* Copy the ciphertext into the state buffer and convert from big endian */ + s0 = be_load_word32(input); + s1 = be_load_word32(input + 4); + s2 = be_load_word32(input + 8); + s3 = be_load_word32(input + 12); + + /* Generate the decryption key at the end of the last round */ + gift128b_load_and_forward_schedule(); + + /* Perform all 40 rounds */ + for (round = 40; round > 0; --round) { + /* Rotate the key schedule backwards */ + temp = w0; + w0 = w1; + w1 = w2; + w2 = w3; + w3 = ((temp & 0x3FFF0000U) << 2) | ((temp & 0xC0000000U) >> 14) | + ((temp & 0x0000FFF0U) >> 4) | ((temp & 0x0000000FU) << 12); + + /* AddRoundKey - XOR in the key schedule and the round constant */ + s2 ^= w1; + s1 ^= w3; + s3 ^= 0x80000000U ^ GIFT128_RC[round - 1]; + + /* InvPermBits - apply the inverse of the 128-bit permutation */ + INV_PERM0(s0); + INV_PERM1(s1); + INV_PERM2(s2); + INV_PERM3(s3); + + /* InvSubCells - apply the inverse of the S-box */ + temp = s0; + s0 = s3; + s3 = temp; + s2 ^= s0 & s1; + s3 ^= 0xFFFFFFFFU; + s1 ^= s3; + s3 ^= s2; + s2 ^= s0 | s1; + s0 ^= s1 & s3; + s1 ^= s0 & s2; + } + + /* Pack the state into the plaintext buffer in big endian */ + be_store_word32(output, s0); + be_store_word32(output + 4, s1); + be_store_word32(output + 8, s2); + be_store_word32(output + 12, s3); +} + void gift128t_decrypt (const gift128n_key_schedule_t *ks, unsigned char *output, - const unsigned char *input, unsigned char tweak) + const unsigned char *input, uint32_t tweak) { - uint32_t s0, s1, s2, s3, tword; + uint32_t s0, s1, s2, s3; + uint32_t w0, w1, w2, w3; + uint32_t temp; + uint8_t round; - /* Copy the ciphertext into the state buffer and convert from nibbles */ + /* Copy the plaintext into the state buffer and convert from nibbles */ gift128n_to_words(output, input); s0 = be_load_word32(output); s1 = be_load_word32(output + 4); s2 = be_load_word32(output + 8); s3 = be_load_word32(output + 12); - /* Perform all 40 rounds five at a time using the fixsliced method. - * Every 5 rounds except the first we add the tweak value to the state */ - tword = GIFT128_tweaks[tweak]; - gift128b_decrypt_5_rounds(ks->k + 70, GIFT128_RC + 35); - s0 ^= tword; - gift128b_decrypt_5_rounds(ks->k + 60, GIFT128_RC + 30); - s0 ^= tword; - gift128b_decrypt_5_rounds(ks->k + 50, GIFT128_RC + 25); - s0 ^= tword; - gift128b_decrypt_5_rounds(ks->k + 40, GIFT128_RC + 20); - s0 ^= tword; - gift128b_decrypt_5_rounds(ks->k + 30, GIFT128_RC + 15); - s0 ^= tword; - gift128b_decrypt_5_rounds(ks->k + 20, GIFT128_RC + 10); - s0 ^= tword; - gift128b_decrypt_5_rounds(ks->k + 10, GIFT128_RC + 5); - s0 ^= tword; - gift128b_decrypt_5_rounds(ks->k, GIFT128_RC); + /* Generate the decryption key at the end of the last round */ + gift128b_load_and_forward_schedule(); + + /* Perform all 40 rounds */ + for (round = 40; round > 0; --round) { + /* Rotate the key schedule backwards */ + temp = w0; + w0 = w1; + w1 = w2; + w2 = w3; + w3 = ((temp & 0x3FFF0000U) << 2) | ((temp & 0xC0000000U) >> 14) | + ((temp & 0x0000FFF0U) >> 4) | ((temp & 0x0000000FU) << 12); + + /* AddTweak - XOR in the tweak every 5 rounds except the last */ + if ((round % 5) == 0 && round < 40) + s0 ^= tweak; + + /* AddRoundKey - XOR in the key schedule and the round constant */ + s2 ^= w1; + s1 ^= w3; + s3 ^= 0x80000000U ^ GIFT128_RC[round - 1]; + + /* InvPermBits - apply the inverse of the 128-bit permutation */ + INV_PERM0(s0); + INV_PERM1(s1); + INV_PERM2(s2); + INV_PERM3(s3); + + /* InvSubCells - apply the inverse of the S-box */ + temp = s0; + s0 = s3; + s3 = temp; + s2 ^= s0 & s1; + s3 ^= 0xFFFFFFFFU; + s1 ^= s3; + s3 ^= s2; + s2 ^= s0 | s1; + s0 ^= s1 & s3; + s1 ^= s0 & s2; + } /* Pack the state into the plaintext buffer in nibble form */ be_store_word32(output, s0); @@ -847,3 +1492,7 @@ void gift128t_decrypt be_store_word32(output + 12, s3); gift128n_to_nibbles(output, output); } + +#endif /* GIFT128_VARIANT_SMALL || GIFT128_VARIANT_TINY */ + +#endif /* !GIFT128_VARIANT_ASM */ diff --git a/hyena/Implementations/crypto_aead/hyenav1/rhys/internal-gift128.h b/hyena/Implementations/crypto_aead/hyenav1/rhys/internal-gift128.h index 1ac40e5..f57d143 100644 --- a/hyena/Implementations/crypto_aead/hyenav1/rhys/internal-gift128.h +++ b/hyena/Implementations/crypto_aead/hyenav1/rhys/internal-gift128.h @@ -47,11 +47,13 @@ * in any of the NIST submissions so we don't bother with it in this library. * * References: https://eprint.iacr.org/2017/622.pdf, + * https://eprint.iacr.org/2020/412.pdf, * https://giftcipher.github.io/gift/ */ #include #include +#include "internal-gift128-config.h" #ifdef __cplusplus extern "C" { @@ -63,16 +65,23 @@ extern "C" { #define GIFT128_BLOCK_SIZE 16 /** - * \brief Number of round keys for the fixsliced representation of GIFT-128. + * \var GIFT128_ROUND_KEYS + * \brief Number of round keys for the GIFT-128 key schedule. */ +#if GIFT128_VARIANT == GIFT128_VARIANT_TINY +#define GIFT128_ROUND_KEYS 4 +#elif GIFT128_VARIANT == GIFT128_VARIANT_SMALL +#define GIFT128_ROUND_KEYS 20 +#else #define GIFT128_ROUND_KEYS 80 +#endif /** * \brief Structure of the key schedule for GIFT-128 (bit-sliced). */ typedef struct { - /** Pre-computed round keys in the fixsliced form */ + /** Pre-computed round keys for bit-sliced GIFT-128 */ uint32_t k[GIFT128_ROUND_KEYS]; } gift128b_key_schedule_t; @@ -81,14 +90,9 @@ typedef struct * \brief Initializes the key schedule for GIFT-128 (bit-sliced). * * \param ks Points to the key schedule to initialize. - * \param key Points to the key data. - * \param key_len Length of the key data, which must be 16. - * - * \return Non-zero on success or zero if there is something wrong - * with the parameters. + * \param key Points to the 16 bytes of the key data. */ -int gift128b_init - (gift128b_key_schedule_t *ks, const unsigned char *key, size_t key_len); +void gift128b_init(gift128b_key_schedule_t *ks, const unsigned char *key); /** * \brief Encrypts a 128-bit block with GIFT-128 (bit-sliced). @@ -145,14 +149,9 @@ typedef gift128b_key_schedule_t gift128n_key_schedule_t; * \brief Initializes the key schedule for GIFT-128 (nibble-based). * * \param ks Points to the key schedule to initialize. - * \param key Points to the key data. - * \param key_len Length of the key data, which must be 16. - * - * \return Non-zero on success or zero if there is something wrong - * with the parameters. + * \param key Points to the 16 bytes of the key data. */ -int gift128n_init - (gift128n_key_schedule_t *ks, const unsigned char *key, size_t key_len); +void gift128n_init(gift128n_key_schedule_t *ks, const unsigned char *key); /** * \brief Encrypts a 128-bit block with GIFT-128 (nibble-based). @@ -182,13 +181,31 @@ void gift128n_decrypt (const gift128n_key_schedule_t *ks, unsigned char *output, const unsigned char *input); +/* 4-bit tweak values expanded to 32-bit for TweGIFT-128 */ +#define GIFT128T_TWEAK_0 0x00000000 /**< TweGIFT-128 tweak value 0 */ +#define GIFT128T_TWEAK_1 0xe1e1e1e1 /**< TweGIFT-128 tweak value 1 */ +#define GIFT128T_TWEAK_2 0xd2d2d2d2 /**< TweGIFT-128 tweak value 2 */ +#define GIFT128T_TWEAK_3 0x33333333 /**< TweGIFT-128 tweak value 3 */ +#define GIFT128T_TWEAK_4 0xb4b4b4b4 /**< TweGIFT-128 tweak value 4 */ +#define GIFT128T_TWEAK_5 0x55555555 /**< TweGIFT-128 tweak value 5 */ +#define GIFT128T_TWEAK_6 0x66666666 /**< TweGIFT-128 tweak value 6 */ +#define GIFT128T_TWEAK_7 0x87878787 /**< TweGIFT-128 tweak value 7 */ +#define GIFT128T_TWEAK_8 0x78787878 /**< TweGIFT-128 tweak value 8 */ +#define GIFT128T_TWEAK_9 0x99999999 /**< TweGIFT-128 tweak value 9 */ +#define GIFT128T_TWEAK_10 0xaaaaaaaa /**< TweGIFT-128 tweak value 10 */ +#define GIFT128T_TWEAK_11 0x4b4b4b4b /**< TweGIFT-128 tweak value 11 */ +#define GIFT128T_TWEAK_12 0xcccccccc /**< TweGIFT-128 tweak value 12 */ +#define GIFT128T_TWEAK_13 0x2d2d2d2d /**< TweGIFT-128 tweak value 13 */ +#define GIFT128T_TWEAK_14 0x1e1e1e1e /**< TweGIFT-128 tweak value 14 */ +#define GIFT128T_TWEAK_15 0xffffffff /**< TweGIFT-128 tweak value 15 */ + /** * \brief Encrypts a 128-bit block with TweGIFT-128 (tweakable variant). * * \param ks Points to the GIFT-128 key schedule. * \param output Output buffer which must be at least 16 bytes in length. * \param input Input buffer which must be at least 16 bytes in length. - * \param tweak 4-bit tweak value. + * \param tweak 4-bit tweak value expanded to 32-bit. * * The \a input and \a output buffers can be the same buffer for * in-place encryption. @@ -200,7 +217,7 @@ void gift128n_decrypt */ void gift128t_encrypt (const gift128n_key_schedule_t *ks, unsigned char *output, - const unsigned char *input, unsigned char tweak); + const unsigned char *input, uint32_t tweak); /** * \brief Decrypts a 128-bit block with TweGIFT-128 (tweakable variant). @@ -208,7 +225,7 @@ void gift128t_encrypt * \param ks Points to the GIFT-128 key schedule. * \param output Output buffer which must be at least 16 bytes in length. * \param input Input buffer which must be at least 16 bytes in length. - * \param tweak 4-bit tweak value. + * \param tweak 4-bit tweak value expanded to 32-bit. * * The \a input and \a output buffers can be the same buffer for * in-place encryption. @@ -220,7 +237,7 @@ void gift128t_encrypt */ void gift128t_decrypt (const gift128n_key_schedule_t *ks, unsigned char *output, - const unsigned char *input, unsigned char tweak); + const unsigned char *input, uint32_t tweak); #ifdef __cplusplus } diff --git a/hyena/Implementations/crypto_aead/hyenav1/rhys/internal-util.h b/hyena/Implementations/crypto_aead/hyenav1/rhys/internal-util.h index e79158c..e30166d 100644 --- a/hyena/Implementations/crypto_aead/hyenav1/rhys/internal-util.h +++ b/hyena/Implementations/crypto_aead/hyenav1/rhys/internal-util.h @@ -238,6 +238,17 @@ } \ } while (0) +/* Rotation functions need to be optimised for best performance on AVR. + * The most efficient rotations are where the number of bits is 1 or a + * multiple of 8, so we compose the efficient rotations to produce all + * other rotation counts of interest. */ + +#if defined(__AVR__) +#define LW_CRYPTO_ROTATE32_COMPOSED 1 +#else +#define LW_CRYPTO_ROTATE32_COMPOSED 0 +#endif + /* Rotation macros for 32-bit arguments */ /* Generic left rotate */ @@ -254,6 +265,8 @@ (_temp >> (bits)) | (_temp << (32 - (bits))); \ })) +#if !LW_CRYPTO_ROTATE32_COMPOSED + /* Left rotate by a specific number of bits. These macros may be replaced * with more efficient ones on platforms that lack a barrel shifter */ #define leftRotate1(a) (leftRotate((a), 1)) @@ -322,6 +335,138 @@ #define rightRotate30(a) (rightRotate((a), 30)) #define rightRotate31(a) (rightRotate((a), 31)) +#else /* LW_CRYPTO_ROTATE32_COMPOSED */ + +/* Composed rotation macros where 1 and 8 are fast, but others are slow */ + +/* Left rotate by 1 */ +#define leftRotate1(a) (leftRotate((a), 1)) + +/* Left rotate by 2 */ +#define leftRotate2(a) (leftRotate(leftRotate((a), 1), 1)) + +/* Left rotate by 3 */ +#define leftRotate3(a) (leftRotate(leftRotate(leftRotate((a), 1), 1), 1)) + +/* Left rotate by 4 */ +#define leftRotate4(a) (leftRotate(leftRotate(leftRotate(leftRotate((a), 1), 1), 1), 1)) + +/* Left rotate by 5: Rotate left by 8, then right by 3 */ +#define leftRotate5(a) (rightRotate(rightRotate(rightRotate(leftRotate((a), 8), 1), 1), 1)) + +/* Left rotate by 6: Rotate left by 8, then right by 2 */ +#define leftRotate6(a) (rightRotate(rightRotate(leftRotate((a), 8), 1), 1)) + +/* Left rotate by 7: Rotate left by 8, then right by 1 */ +#define leftRotate7(a) (rightRotate(leftRotate((a), 8), 1)) + +/* Left rotate by 8 */ +#define leftRotate8(a) (leftRotate((a), 8)) + +/* Left rotate by 9: Rotate left by 8, then left by 1 */ +#define leftRotate9(a) (leftRotate(leftRotate((a), 8), 1)) + +/* Left rotate by 10: Rotate left by 8, then left by 2 */ +#define leftRotate10(a) (leftRotate(leftRotate(leftRotate((a), 8), 1), 1)) + +/* Left rotate by 11: Rotate left by 8, then left by 3 */ +#define leftRotate11(a) (leftRotate(leftRotate(leftRotate(leftRotate((a), 8), 1), 1), 1)) + +/* Left rotate by 12: Rotate left by 16, then right by 4 */ +#define leftRotate12(a) (rightRotate(rightRotate(rightRotate(rightRotate(leftRotate((a), 16), 1), 1), 1), 1)) + +/* Left rotate by 13: Rotate left by 16, then right by 3 */ +#define leftRotate13(a) (rightRotate(rightRotate(rightRotate(leftRotate((a), 16), 1), 1), 1)) + +/* Left rotate by 14: Rotate left by 16, then right by 2 */ +#define leftRotate14(a) (rightRotate(rightRotate(leftRotate((a), 16), 1), 1)) + +/* Left rotate by 15: Rotate left by 16, then right by 1 */ +#define leftRotate15(a) (rightRotate(leftRotate((a), 16), 1)) + +/* Left rotate by 16 */ +#define leftRotate16(a) (leftRotate((a), 16)) + +/* Left rotate by 17: Rotate left by 16, then left by 1 */ +#define leftRotate17(a) (leftRotate(leftRotate((a), 16), 1)) + +/* Left rotate by 18: Rotate left by 16, then left by 2 */ +#define leftRotate18(a) (leftRotate(leftRotate(leftRotate((a), 16), 1), 1)) + +/* Left rotate by 19: Rotate left by 16, then left by 3 */ +#define leftRotate19(a) (leftRotate(leftRotate(leftRotate(leftRotate((a), 16), 1), 1), 1)) + +/* Left rotate by 20: Rotate left by 16, then left by 4 */ +#define leftRotate20(a) (leftRotate(leftRotate(leftRotate(leftRotate(leftRotate((a), 16), 1), 1), 1), 1)) + +/* Left rotate by 21: Rotate left by 24, then right by 3 */ +#define leftRotate21(a) (rightRotate(rightRotate(rightRotate(leftRotate((a), 24), 1), 1), 1)) + +/* Left rotate by 22: Rotate left by 24, then right by 2 */ +#define leftRotate22(a) (rightRotate(rightRotate(leftRotate((a), 24), 1), 1)) + +/* Left rotate by 23: Rotate left by 24, then right by 1 */ +#define leftRotate23(a) (rightRotate(leftRotate((a), 24), 1)) + +/* Left rotate by 24 */ +#define leftRotate24(a) (leftRotate((a), 24)) + +/* Left rotate by 25: Rotate left by 24, then left by 1 */ +#define leftRotate25(a) (leftRotate(leftRotate((a), 24), 1)) + +/* Left rotate by 26: Rotate left by 24, then left by 2 */ +#define leftRotate26(a) (leftRotate(leftRotate(leftRotate((a), 24), 1), 1)) + +/* Left rotate by 27: Rotate left by 24, then left by 3 */ +#define leftRotate27(a) (leftRotate(leftRotate(leftRotate(leftRotate((a), 24), 1), 1), 1)) + +/* Left rotate by 28: Rotate right by 4 */ +#define leftRotate28(a) (rightRotate(rightRotate(rightRotate(rightRotate((a), 1), 1), 1), 1)) + +/* Left rotate by 29: Rotate right by 3 */ +#define leftRotate29(a) (rightRotate(rightRotate(rightRotate((a), 1), 1), 1)) + +/* Left rotate by 30: Rotate right by 2 */ +#define leftRotate30(a) (rightRotate(rightRotate((a), 1), 1)) + +/* Left rotate by 31: Rotate right by 1 */ +#define leftRotate31(a) (rightRotate((a), 1)) + +/* Define the 32-bit right rotations in terms of left rotations */ +#define rightRotate1(a) (leftRotate31((a))) +#define rightRotate2(a) (leftRotate30((a))) +#define rightRotate3(a) (leftRotate29((a))) +#define rightRotate4(a) (leftRotate28((a))) +#define rightRotate5(a) (leftRotate27((a))) +#define rightRotate6(a) (leftRotate26((a))) +#define rightRotate7(a) (leftRotate25((a))) +#define rightRotate8(a) (leftRotate24((a))) +#define rightRotate9(a) (leftRotate23((a))) +#define rightRotate10(a) (leftRotate22((a))) +#define rightRotate11(a) (leftRotate21((a))) +#define rightRotate12(a) (leftRotate20((a))) +#define rightRotate13(a) (leftRotate19((a))) +#define rightRotate14(a) (leftRotate18((a))) +#define rightRotate15(a) (leftRotate17((a))) +#define rightRotate16(a) (leftRotate16((a))) +#define rightRotate17(a) (leftRotate15((a))) +#define rightRotate18(a) (leftRotate14((a))) +#define rightRotate19(a) (leftRotate13((a))) +#define rightRotate20(a) (leftRotate12((a))) +#define rightRotate21(a) (leftRotate11((a))) +#define rightRotate22(a) (leftRotate10((a))) +#define rightRotate23(a) (leftRotate9((a))) +#define rightRotate24(a) (leftRotate8((a))) +#define rightRotate25(a) (leftRotate7((a))) +#define rightRotate26(a) (leftRotate6((a))) +#define rightRotate27(a) (leftRotate5((a))) +#define rightRotate28(a) (leftRotate4((a))) +#define rightRotate29(a) (leftRotate3((a))) +#define rightRotate30(a) (leftRotate2((a))) +#define rightRotate31(a) (leftRotate1((a))) + +#endif /* LW_CRYPTO_ROTATE32_COMPOSED */ + /* Rotation macros for 64-bit arguments */ /* Generic left rotate */ diff --git a/hyena/Implementations/crypto_aead/hyenav2/rhys/encrypt.c b/hyena/Implementations/crypto_aead/hyenav2/rhys/encrypt.c index db50784..188335e 100644 --- a/hyena/Implementations/crypto_aead/hyenav2/rhys/encrypt.c +++ b/hyena/Implementations/crypto_aead/hyenav2/rhys/encrypt.c @@ -9,7 +9,7 @@ int crypto_aead_encrypt const unsigned char *npub, const unsigned char *k) { - return hyena_aead_encrypt + return hyena_v2_aead_encrypt (c, clen, m, mlen, ad, adlen, nsec, npub, k); } @@ -21,6 +21,6 @@ int crypto_aead_decrypt const unsigned char *npub, const unsigned char *k) { - return hyena_aead_decrypt + return hyena_v2_aead_decrypt (m, mlen, nsec, c, clen, ad, adlen, npub, k); } diff --git a/hyena/Implementations/crypto_aead/hyenav2/rhys/hyena.c b/hyena/Implementations/crypto_aead/hyenav2/rhys/hyena.c index db5ba2b..eaafb36 100644 --- a/hyena/Implementations/crypto_aead/hyenav2/rhys/hyena.c +++ b/hyena/Implementations/crypto_aead/hyenav2/rhys/hyena.c @@ -25,14 +25,24 @@ #include "internal-util.h" #include -aead_cipher_t const hyena_cipher = { - "HYENA", +aead_cipher_t const hyena_v1_cipher = { + "HYENA-v1", HYENA_KEY_SIZE, HYENA_NONCE_SIZE, HYENA_TAG_SIZE, AEAD_FLAG_LITTLE_ENDIAN, - hyena_aead_encrypt, - hyena_aead_decrypt + hyena_v1_aead_encrypt, + hyena_v1_aead_decrypt +}; + +aead_cipher_t const hyena_v2_cipher = { + "HYENA-v2", + HYENA_KEY_SIZE, + HYENA_NONCE_SIZE, + HYENA_TAG_SIZE, + AEAD_FLAG_LITTLE_ENDIAN, + hyena_v2_aead_encrypt, + hyena_v2_aead_decrypt }; /** @@ -69,7 +79,236 @@ static void hyena_triple_delta(unsigned char D[8]) } /** - * \brief Process the associated data for HYENA. + * \brief Process the associated data for HYENA-v1. + * + * \param ks Key schedule for the GIFT-128 cipher. + * \param Y Internal hash state of HYENA. + * \param D Internal hash state of HYENA. + * \param ad Points to the associated data. + * \param adlen Length of the associated data in bytes. + */ +static void hyena_v1_process_ad + (const gift128n_key_schedule_t *ks, unsigned char Y[16], + unsigned char D[8], const unsigned char *ad, + unsigned long long adlen) +{ + unsigned char feedback[16]; + hyena_double_delta(D); + while (adlen > 16) { + memcpy(feedback, ad, 16); + lw_xor_block(feedback + 8, Y + 8, 8); + lw_xor_block(feedback + 8, D, 8); + lw_xor_block(Y, feedback, 16); + gift128n_encrypt(ks, Y, Y); + hyena_double_delta(D); + ad += 16; + adlen -= 16; + } + if (adlen == 16) { + hyena_double_delta(D); + memcpy(feedback, ad, 16); + lw_xor_block(feedback + 8, Y + 8, 8); + lw_xor_block(feedback + 8, D, 8); + lw_xor_block(Y, feedback, 16); + } else { + unsigned temp = (unsigned)adlen; + hyena_double_delta(D); + hyena_double_delta(D); + memcpy(feedback, ad, temp); + feedback[temp] = 0x01; + memset(feedback + temp + 1, 0, 15 - temp); + if (temp > 8) + lw_xor_block(feedback + 8, Y + 8, temp - 8); + lw_xor_block(feedback + 8, D, 8); + lw_xor_block(Y, feedback, 16); + } +} + +int hyena_v1_aead_encrypt + (unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k) +{ + gift128n_key_schedule_t ks; + unsigned char Y[16]; + unsigned char D[8]; + unsigned char feedback[16]; + unsigned index; + (void)nsec; + + /* Set the length of the returned ciphertext */ + *clen = mlen + HYENA_TAG_SIZE; + + /* Set up the key schedule and use it to encrypt the nonce */ + gift128n_init(&ks, k); + Y[0] = 0; + if (adlen == 0) + Y[0] |= 0x01; + if (adlen == 0 && mlen == 0) + Y[0] |= 0x02; + Y[1] = 0; + Y[2] = 0; + Y[3] = 0; + memcpy(Y + 4, npub, HYENA_NONCE_SIZE); + gift128n_encrypt(&ks, Y, Y); + memcpy(D, Y + 8, 8); + + /* Process the associated data */ + hyena_v1_process_ad(&ks, Y, D, ad, adlen); + + /* Encrypt the plaintext to produce the ciphertext */ + if (mlen > 0) { + while (mlen > 16) { + gift128n_encrypt(&ks, Y, Y); + hyena_double_delta(D); + memcpy(feedback, m, 16); + lw_xor_block(feedback + 8, Y + 8, 8); + lw_xor_block(feedback + 8, D, 8); + lw_xor_block_2_src(c, m, Y, 16); + lw_xor_block(Y, feedback, 16); + c += 16; + m += 16; + mlen -= 16; + } + gift128n_encrypt(&ks, Y, Y); + if (mlen == 16) { + hyena_double_delta(D); + hyena_double_delta(D); + memcpy(feedback, m, 16); + lw_xor_block(feedback + 8, Y + 8, 8); + lw_xor_block(feedback + 8, D, 8); + lw_xor_block_2_src(c, m, Y, 16); + lw_xor_block(Y, feedback, 16); + c += 16; + } else { + unsigned temp = (unsigned)mlen; + hyena_double_delta(D); + hyena_double_delta(D); + hyena_double_delta(D); + memcpy(feedback, m, temp); + feedback[temp] = 0x01; + memset(feedback + temp + 1, 0, 15 - temp); + if (temp > 8) + lw_xor_block(feedback + 8, Y + 8, temp - 8); + lw_xor_block(feedback + 8, D, 8); + lw_xor_block_2_src(c, m, Y, temp); + lw_xor_block(Y, feedback, 16); + c += temp; + } + } + + /* Swap the two halves of Y and generate the authentication tag */ + for (index = 0; index < 8; ++index) { + unsigned char temp1 = Y[index]; + unsigned char temp2 = Y[index + 8]; + Y[index] = temp2; + Y[index + 8] = temp1; + } + gift128n_encrypt(&ks, c, Y); + return 0; +} + +int hyena_v1_aead_decrypt + (unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k) +{ + gift128n_key_schedule_t ks; + unsigned char Y[16]; + unsigned char D[8]; + unsigned char feedback[16]; + unsigned char *mtemp; + unsigned index; + (void)nsec; + + /* Validate the ciphertext length and set the return "mlen" value */ + if (clen < HYENA_TAG_SIZE) + return -1; + *mlen = clen - HYENA_TAG_SIZE; + + /* Set up the key schedule and use it to encrypt the nonce */ + gift128n_init(&ks, k); + Y[0] = 0; + if (adlen == 0) + Y[0] |= 0x01; + if (adlen == 0 && clen == HYENA_TAG_SIZE) + Y[0] |= 0x02; + Y[1] = 0; + Y[2] = 0; + Y[3] = 0; + memcpy(Y + 4, npub, HYENA_NONCE_SIZE); + gift128n_encrypt(&ks, Y, Y); + memcpy(D, Y + 8, 8); + + /* Process the associated data */ + hyena_v1_process_ad(&ks, Y, D, ad, adlen); + + /* Decrypt the ciphertext to produce the plaintext */ + clen -= HYENA_TAG_SIZE; + mtemp = m; + if (clen > 0) { + while (clen > 16) { + gift128n_encrypt(&ks, Y, Y); + hyena_double_delta(D); + memcpy(feedback + 8, c + 8, 8); + lw_xor_block_2_src(m, c, Y, 16); + memcpy(feedback, m, 8); + lw_xor_block(feedback + 8, D, 8); + lw_xor_block(Y, feedback, 16); + c += 16; + m += 16; + clen -= 16; + } + gift128n_encrypt(&ks, Y, Y); + if (clen == 16) { + hyena_double_delta(D); + hyena_double_delta(D); + memcpy(feedback + 8, c + 8, 8); + lw_xor_block_2_src(m, c, Y, 16); + memcpy(feedback, m, 8); + lw_xor_block(feedback + 8, D, 8); + lw_xor_block(Y, feedback, 16); + c += 16; + } else { + unsigned temp = (unsigned)clen; + hyena_double_delta(D); + hyena_double_delta(D); + hyena_double_delta(D); + if (temp > 8) { + memcpy(feedback + 8, c + 8, temp - 8); + lw_xor_block_2_src(m, c, Y, temp); + memcpy(feedback, m, 8); + } else { + lw_xor_block_2_src(m, c, Y, temp); + memcpy(feedback, m, temp); + } + feedback[temp] = 0x01; + memset(feedback + temp + 1, 0, 15 - temp); + lw_xor_block(feedback + 8, D, 8); + lw_xor_block(Y, feedback, 16); + c += temp; + } + } + + /* Swap the two halves of Y and check the authentication tag */ + for (index = 0; index < 8; ++index) { + unsigned char temp1 = Y[index]; + unsigned char temp2 = Y[index + 8]; + Y[index] = temp2; + Y[index + 8] = temp1; + } + gift128n_encrypt(&ks, Y, Y); + return aead_check_tag(mtemp, *mlen, Y, c, HYENA_TAG_SIZE); +} + +/** + * \brief Process the associated data for HYENA-v2. * * \param ks Key schedule for the GIFT-128 cipher. * \param Y Internal hash state of HYENA. @@ -77,7 +316,7 @@ static void hyena_triple_delta(unsigned char D[8]) * \param ad Points to the associated data. * \param adlen Length of the associated data in bytes. */ -static void hyena_process_ad +static void hyena_v2_process_ad (const gift128n_key_schedule_t *ks, unsigned char Y[16], unsigned char D[8], const unsigned char *ad, unsigned long long adlen) @@ -113,7 +352,7 @@ static void hyena_process_ad } } -int hyena_aead_encrypt +int hyena_v2_aead_encrypt (unsigned char *c, unsigned long long *clen, const unsigned char *m, unsigned long long mlen, const unsigned char *ad, unsigned long long adlen, @@ -146,7 +385,7 @@ int hyena_aead_encrypt memcpy(D, Y + 8, 8); /* Process the associated data */ - hyena_process_ad(&ks, Y, D, ad, adlen); + hyena_v2_process_ad(&ks, Y, D, ad, adlen); /* Encrypt the plaintext to produce the ciphertext */ if (mlen > 0) { @@ -198,7 +437,7 @@ int hyena_aead_encrypt return 0; } -int hyena_aead_decrypt +int hyena_v2_aead_decrypt (unsigned char *m, unsigned long long *mlen, unsigned char *nsec, const unsigned char *c, unsigned long long clen, @@ -234,7 +473,7 @@ int hyena_aead_decrypt memcpy(D, Y + 8, 8); /* Process the associated data */ - hyena_process_ad(&ks, Y, D, ad, adlen); + hyena_v2_process_ad(&ks, Y, D, ad, adlen); /* Decrypt the ciphertext to produce the plaintext */ clen -= HYENA_TAG_SIZE; diff --git a/hyena/Implementations/crypto_aead/hyenav2/rhys/hyena.h b/hyena/Implementations/crypto_aead/hyenav2/rhys/hyena.h index ee9bb9c..79585d5 100644 --- a/hyena/Implementations/crypto_aead/hyenav2/rhys/hyena.h +++ b/hyena/Implementations/crypto_aead/hyenav2/rhys/hyena.h @@ -33,6 +33,12 @@ * GIFT-128 block cipher. The algorithm has a 128-bit key, a 96-bit nonce, * and a 128-bit authentication tag. * + * This library implements both the v1 and v2 versions of HYENA from the + * authors. The v1 version was submitted to the second round of the + * NIST Lightweight Cryptography Competition but was later found to have a + * forgery attack. The authors fixed this with v2 but it was too late to + * submit the update for the second round. + * * References: https://www.isical.ac.in/~lightweight/hyena/ */ @@ -56,12 +62,76 @@ extern "C" { #define HYENA_NONCE_SIZE 12 /** - * \brief Meta-information block for the HYENA cipher. + * \brief Meta-information block for the HYENA-v1 cipher. + */ +extern aead_cipher_t const hyena_v1_cipher; + +/** + * \brief Meta-information block for the HYENA-v2 cipher. + */ +extern aead_cipher_t const hyena_v2_cipher; + +/** + * \brief Encrypts and authenticates a packet with HYENA-v1. + * + * \param c Buffer to receive the output. + * \param clen On exit, set to the length of the output which includes + * the ciphertext and the 16 byte authentication tag. + * \param m Buffer that contains the plaintext message to encrypt. + * \param mlen Length of the plaintext message in bytes. + * \param ad Buffer that contains associated data to authenticate + * along with the packet but which does not need to be encrypted. + * \param adlen Length of the associated data in bytes. + * \param nsec Secret nonce - not used by this algorithm. + * \param npub Points to the public nonce for the packet which must + * be 12 bytes in length. + * \param k Points to the 16 bytes of the key to use to encrypt the packet. + * + * \return 0 on success, or a negative value if there was an error in + * the parameters. + * + * \sa hyena_aead_decrypt() */ -extern aead_cipher_t const hyena_cipher; +int hyena_v1_aead_encrypt + (unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k); + +/** + * \brief Decrypts and authenticates a packet with HYENA-v1. + * + * \param m Buffer to receive the plaintext message on output. + * \param mlen Receives the length of the plaintext message on output. + * \param nsec Secret nonce - not used by this algorithm. + * \param c Buffer that contains the ciphertext and authentication + * tag to decrypt. + * \param clen Length of the input data in bytes, which includes the + * ciphertext and the 16 byte authentication tag. + * \param ad Buffer that contains associated data to authenticate + * along with the packet but which does not need to be encrypted. + * \param adlen Length of the associated data in bytes. + * \param npub Points to the public nonce for the packet which must + * be 12 bytes in length. + * \param k Points to the 16 bytes of the key to use to decrypt the packet. + * + * \return 0 on success, -1 if the authentication tag was incorrect, + * or some other negative number if there was an error in the parameters. + * + * \sa hyena_aead_encrypt() + */ +int hyena_v1_aead_decrypt + (unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k); /** - * \brief Encrypts and authenticates a packet with HYENA. + * \brief Encrypts and authenticates a packet with HYENA-v2. * * \param c Buffer to receive the output. * \param clen On exit, set to the length of the output which includes @@ -81,7 +151,7 @@ extern aead_cipher_t const hyena_cipher; * * \sa hyena_aead_decrypt() */ -int hyena_aead_encrypt +int hyena_v2_aead_encrypt (unsigned char *c, unsigned long long *clen, const unsigned char *m, unsigned long long mlen, const unsigned char *ad, unsigned long long adlen, @@ -90,7 +160,7 @@ int hyena_aead_encrypt const unsigned char *k); /** - * \brief Decrypts and authenticates a packet with HYENA. + * \brief Decrypts and authenticates a packet with HYENA-v2. * * \param m Buffer to receive the plaintext message on output. * \param mlen Receives the length of the plaintext message on output. @@ -111,7 +181,7 @@ int hyena_aead_encrypt * * \sa hyena_aead_encrypt() */ -int hyena_aead_decrypt +int hyena_v2_aead_decrypt (unsigned char *m, unsigned long long *mlen, unsigned char *nsec, const unsigned char *c, unsigned long long clen, diff --git a/knot/Implementations/crypto_aead/knot128v1/armcortexm_3/api.h b/knot/Implementations/crypto_aead/knot128v1/armcortexm_3/api.h new file mode 100644 index 0000000..732ae75 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/armcortexm_3/api.h @@ -0,0 +1,8 @@ + +#define CRYPTO_KEYBYTES 16 // +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 + + diff --git a/knot/Implementations/crypto_aead/knot128v1/armcortexm_3/auxFormat.h b/knot/Implementations/crypto_aead/knot128v1/armcortexm_3/auxFormat.h new file mode 100644 index 0000000..f0ccdbd --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/armcortexm_3/auxFormat.h @@ -0,0 +1,67 @@ +#include"crypto_aead.h" +#include"api.h" +#include +#define U32BIG(x) (x) + + +#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) +#define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n)))) + + +#define sbox(a, b, c, d, f, g, h) \ +{ \ + t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \ +} + + +typedef unsigned char u8; +typedef unsigned int u32; +typedef unsigned long long u64; +#define packFormat(out,in) {\ +t1 = U32BIG(((u32*)in)[0]); \ +t2 = U32BIG(((u32*)in)[1]); \ +t3 = (t1 ^ (t1 >> 1)) & 0x22222222, t1 ^= t3 ^ (t3 << 1); \ +t3 = (t1 ^ (t1 >> 2)) & 0x0C0C0C0C, t1 ^= t3 ^ (t3 << 2); \ +t3 = (t1 ^ (t1 >> 4)) & 0x00F000F0, t1 ^= t3 ^ (t3 << 4); \ +t3 = (t1 ^ (t1 >> 8)) & 0x0000FF00, t1 ^= t3 ^ (t3 << 8); \ +t5 = (t2 ^ (t2 >> 1)) & 0x22222222, t2 ^= t5 ^ (t5 << 1); \ +t5 = (t2 ^ (t2 >> 2)) & 0x0C0C0C0C, t2 ^= t5 ^ (t5 << 2); \ +t5 = (t2 ^ (t2 >> 4)) & 0x00F000F0, t2 ^= t5 ^ (t5 << 4); \ +t5 = (t2 ^ (t2 >> 8)) & 0x0000FF00, t2 ^= t5 ^ (t5 << 8); \ +out[0] = (t2 & 0xFFFF0000) | (t1 >> 16); \ +out[1] = (t2 << 16) | (t1 & 0x0000FFFF); \ +} +#define unpackFormat(out, in) {\ + t2 = (in[0] & 0xFFFF0000) | (in[1] >> 16); \ + t1 = (in[1] & 0x0000FFFF) | (in[0] << 16); \ + t3 = (t1 ^ (t1 >> 8)) & 0x0000FF00, t1 ^= t3 ^ (t3 << 8); \ + t3 = (t1 ^ (t1 >> 4)) & 0x00F000F0, t1 ^= t3 ^ (t3 << 4); \ + t3 = (t1 ^ (t1 >> 2)) & 0x0C0C0C0C, t1 ^= t3 ^ (t3 << 2); \ + t3 = (t1 ^ (t1 >> 1)) & 0x22222222, t1 ^= t3 ^ (t3 << 1); \ + t5 = (t2 ^ (t2 >> 8)) & 0x0000FF00, t2 ^= t5 ^ (t5 << 8); \ + t5 = (t2 ^ (t2 >> 4)) & 0x00F000F0, t2 ^= t5 ^ (t5 << 4); \ + t5 = (t2 ^ (t2 >> 2)) & 0x0C0C0C0C, t2 ^= t5 ^ (t5 << 2); \ + t5 = (t2 ^ (t2 >> 1)) & 0x22222222, t2 ^= t5 ^ (t5 << 1); \ + *((u64*)out) = ((u64)t2 << 32 | t1); \ +} +#define getU32Format(out, in) {\ + t1, t2 = U32BIG(((u32*)in)[0]); \ + t1 = (t2 ^ (t2 >> 1)) & 0x22222222, t2 ^= t1 ^ (t1 << 1); \ + t1 = (t2 ^ (t2 >> 2)) & 0x0C0C0C0C, t2 ^= t1 ^ (t1 << 2); \ + t1 = (t2 ^ (t2 >> 4)) & 0x00F000F0, t2 ^= t1 ^ (t1 << 4); \ + t1 = (t2 ^ (t2 >> 8)) & 0x0000FF00, t2 ^= t1 ^ (t1 << 8); \ + *out = t2; \ +} +#define ROUND256( constant6Format,lunNum) {\ + s[0] ^= constant6Format[lunNum]>> 4;\ + s[1] ^= constant6Format[lunNum]& 0x0f;\ + sbox(s[0], s[2], s[4], s[6], s_temp[2], s_temp[4], s_temp[6]);\ + sbox(s[1], s[3], s[5], s[7], s[2], s_temp[5], s_temp[7]);\ + s[3] = LOTR32(s_temp[2], 1);\ + s[4] = LOTR32(s_temp[4], 4);\ + s[5] = LOTR32(s_temp[5], 4);\ + s[6] = LOTR32(s_temp[7], 12);\ + s[7] = LOTR32(s_temp[6], 13);\ +} + + diff --git a/knot/Implementations/crypto_aead/knot128v1/armcortexm_3/crypto_aead.h b/knot/Implementations/crypto_aead/knot128v1/armcortexm_3/crypto_aead.h new file mode 100644 index 0000000..862d176 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/armcortexm_3/crypto_aead.h @@ -0,0 +1,18 @@ + +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k +); + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k +); diff --git a/knot/Implementations/crypto_aead/knot128v1/armcortexm_3/encrypt.c b/knot/Implementations/crypto_aead/knot128v1/armcortexm_3/encrypt.c new file mode 100644 index 0000000..30c6c80 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/armcortexm_3/encrypt.c @@ -0,0 +1,245 @@ + +#include"auxFormat.h" + +#define RATE (64 / 8) + +#define PR0_ROUNDS 52 +#define PR_ROUNDS 28 +#define PRF_ROUNDS 32 +unsigned char constant6Format[63] = { + /*constant6_aead_128v1:*/ +0x1, +0x10, +0x2, +0x20, +0x4, +0x41, +0x11, +0x12, +0x22, +0x24, +0x45, +0x50, +0x3, +0x30, +0x6, +0x61, +0x15, +0x53, +0x33, +0x36, +0x67, +0x74, +0x46, +0x60, +0x5, +0x51, +0x13, +0x32, +0x26, +0x65, +0x54, +0x42, +0x21, +0x14, +0x43, +0x31, +0x16, +0x63, +0x35, +0x57, +0x72, +0x27, +0x75, +0x56, +0x62, +0x25, +0x55, +0x52, +0x23, +0x34, +0x47, +0x70, +0x7, +0x71, +0x17, +0x73, +0x37, +0x77, +0x76, +0x66, +0x64, +0x44, +0x40, + +}; + + + +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k) { + unsigned int i, j; + u32 s[8] = { 0 }; + u32 dataFormat[2] = { 0 }; + u8 tempData[8]; + u32 s_temp[8] = { 0 }; + u32 t1, t2, t3, t5, t6, t8, t9, t11; + *clen = mlen + CRYPTO_ABYTES; + //initialization + packFormat(s, npub); + packFormat((s + 2), (npub + 8)); + packFormat((s + 4), k); + packFormat((s + 6), (k + 8)); + for (i = 0; i < PR0_ROUNDS; i++) { + ROUND256(constant6Format,i); + } + // process associated data + if (adlen) { + while (adlen >= RATE) { + packFormat(dataFormat, ad); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + for (i = 0; i < PR_ROUNDS; i++) { + ROUND256(constant6Format, i); + } + adlen -= RATE; + ad += RATE; + } + memset(tempData, 0, sizeof(tempData)); +memcpy(tempData, ad, adlen * sizeof(unsigned char)); +tempData[adlen] = 0x01; + packFormat(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + for (i = 0; i < PR_ROUNDS; i++) { + ROUND256(constant6Format, i); + } + } + s[6] ^= 0x80000000; + if (mlen) { + while (mlen >= RATE) { + packFormat(dataFormat, m); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + unpackFormat(c, s); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND256(constant6Format, i); + } + mlen -= RATE; + m += RATE; + c += RATE; + } + memset(tempData, 0, sizeof(tempData)); +memcpy(tempData, m, mlen * sizeof(unsigned char)); + +tempData[mlen]= 0x01; + packFormat(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + unpackFormat(tempData, s); + memcpy(c, tempData, mlen * sizeof(unsigned char)); + c +=mlen; + } + // finalization + for (i = 0; i < PRF_ROUNDS; i++) { + ROUND256(constant6Format, i); + } + // return tag + unpackFormat(tempData, s); + memcpy(c, tempData, sizeof(tempData)); + unpackFormat(tempData,(s + 2)); + memcpy(c+8, tempData, sizeof(tempData)); + return 0; +} + +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) { + u8 i, j; + // initialization + //256/32=8 + u32 s[8] = { 0 }; + u32 dataFormat[4] = { 0 }; + u32 dataFormat_1[2] = { 0 }; + u8 tempU8[32] = { 0 }; + u8 tempData[8]; + u32 s_temp[8] = { 0 }; + u32 t1, t2, t3, t5, t6, t8, t9, t11; + *mlen = clen - CRYPTO_ABYTES; + if (clen < CRYPTO_ABYTES) + return -1; + //initialization + packFormat(s, npub); + packFormat((s + 2), (npub + 8)); + packFormat((s + 4), k); + packFormat((s + 6), (k + 8)); + for (i = 0; i < PR0_ROUNDS; i++) { + ROUND256(constant6Format, i); + } + // process associated data + if (adlen) { + while (adlen >= RATE) { + packFormat(dataFormat, ad); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + for (i = 0; i < PR_ROUNDS; i++) { + ROUND256(constant6Format, i); + } + adlen -= RATE; + ad += RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, ad, adlen * sizeof(unsigned char)); + tempData[adlen] = 0x01; + packFormat(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + for (i = 0; i < PR_ROUNDS; i++) { + ROUND256(constant6Format, i); + } + } + s[6] ^= 0x80000000; + // process c + clen = clen - CRYPTO_KEYBYTES; + if (clen) { + while (clen >= RATE) { + packFormat(dataFormat, c); + dataFormat_1[0] = s[0] ^ dataFormat[0]; + dataFormat_1[1] = s[1] ^ dataFormat[1]; + unpackFormat(m, dataFormat_1); + s[0] = dataFormat[0]; + s[1] = dataFormat[1]; + for (i = 0; i < PR_ROUNDS; i++) { + ROUND256(constant6Format, i); + } + clen -= RATE; + m += RATE; + c += RATE; + } + unpackFormat(tempU8, s); + for (i = 0; i < clen; ++i, ++m, ++c) + { + *m = tempU8[i]^ *c; + tempU8[i] = *c; + } + tempU8[i] ^= 0x01; + packFormat(s, tempU8); + } + // finalization + for (i = 0; i < PRF_ROUNDS; i++) { + ROUND256(constant6Format, i); + } + // return tag + unpackFormat(tempU8, s); + unpackFormat((tempU8+8), (s+2)); + if (memcmp((void*)tempU8, (void*)c,CRYPTO_ABYTES)) { + *mlen = 0; + memset(m, 0, sizeof(unsigned char) * (clen - CRYPTO_ABYTES)); + return -1; + } + return 0; +} diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_3/api.h b/knot/Implementations/crypto_aead/knot128v2/armcortexm_3/api.h new file mode 100644 index 0000000..d8257f4 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_3/api.h @@ -0,0 +1,7 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 + + diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_3/auxFormat.h b/knot/Implementations/crypto_aead/knot128v2/armcortexm_3/auxFormat.h new file mode 100644 index 0000000..73bf963 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_3/auxFormat.h @@ -0,0 +1,130 @@ + +#include"crypto_aead.h" +#include"api.h" +#include +#include +#include +#define U32BIG(x) (x) + +typedef unsigned char u8; +typedef unsigned int u32; +typedef unsigned long long u64; + +#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) +#define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n)))) + +#define puckU32ToThree(x){\ +x &= 0x92492492;\ +x = (x | (x << 2)) & 0xc30c30c3;\ +x = (x | (x << 4)) & 0xf00f00f0;\ +x = (x | (x << 8)) & 0xff0000ff;\ +x = (x | (x << 16)) & 0xfff00000;\ +} +#define unpuckU32ToThree(x){\ +x &= 0xfff00000;\ +x = (x | (x >> 16)) & 0xff0000ff;\ +x = (x | (x >> 8)) & 0xf00f00f0;\ +x = (x | (x >> 4)) & 0xc30c30c3;\ +x = (x | (x >> 2)) & 0x92492492;\ +} +#define packU32FormatToThreePacket( out, in) {\ +t2 = U32BIG(((u32*)in)[0]); \ +t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6; \ +t2 = t2 << 2; \ +temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2; \ +puckU32ToThree(temp2[0]); \ +puckU32ToThree(temp2[1]); \ +puckU32ToThree(temp2[2]); \ +out[0] = (temp2[0] >> 22); \ +out[1] = (((u32)t2_64) << 10) | (temp2[1] >> 22); \ +out[2] =(((u32)t2_65) << 10) | (temp2[2] >> 22); \ +} +#define packU96FormatToThreePacket(out, in) {\ +t9 = U32BIG(((u32*)in)[2]); \ +t1 = U32BIG(((u32*)in)[1]); \ +t2 = U32BIG(((u32*)in)[0]); \ +t1_32 = (in[7] & 0x80) >> 7, t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6; \ +t1 = t1 << 1; \ +t2 = t2 << 2; \ +temp0[0] = t9; temp0[1] = t9 << 1; temp0[2] = t9 << 2; \ +puckU32ToThree(temp0[0]); \ +puckU32ToThree(temp0[1]); \ +puckU32ToThree(temp0[2]); \ +temp1[0] = t1; temp1[1] = t1 << 1; temp1[2] = t1 << 2; \ +puckU32ToThree(temp1[0]); \ +puckU32ToThree(temp1[1]); \ +puckU32ToThree(temp1[2]); \ +temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2; \ +puckU32ToThree(temp2[0]); \ +puckU32ToThree(temp2[1]); \ +puckU32ToThree(temp2[2]); \ +out[0] = (temp0[0]) | (temp1[0] >> 11) | (temp2[0] >> 22); \ +out[1] = (temp0[1]) | (temp1[1] >> 11) | (((u32)t2_64) << 10) | (temp2[1] >> 22); \ +out[2] = (temp0[2]) | (((u32)t1_32) << 21) | (temp1[2] >> 11) | (((u32)t2_65) << 10) | (temp2[2] >> 22); \ +} +#define unpackU32FormatToThreePacket(out, in) {\ +temp2[0] = (in[0] & 0x000003ff) << 22; \ +t2_64 = ((in[1] & 0x00000400) << 21); \ +temp2[1] = (in[1] & 0x000003ff) << 22; \ +t2_65 = ((in[2] & 0x00000400) << 20); \ +temp2[2] = (in[2] & 0x000003ff) << 22; \ +unpuckU32ToThree(temp2[0]); \ +unpuckU32ToThree(temp2[1]); \ +unpuckU32ToThree(temp2[2]); \ +t2 = t2_65 | t2_64 | ((temp2[0] | temp2[1] >> 1 | temp2[2] >> 2) >> 2); \ +*(u32*)(out) = U32BIG(t2); \ +} +#define unpackU96FormatToThreePacket( out, in) {\ +temp0[0] = in[0] & 0xffe00000; \ +temp1[0] = (in[0] & 0x001ffc00) << 11; \ +temp2[0] = (in[0] & 0x000003ff) << 22; \ +temp0[1] = in[1] & 0xffe00000; \ +temp1[1] = (in[1] & 0x001ff800) << 11; \ +t2_64 = ((in[1] & 0x00000400) << 21); \ +temp2[1] = (in[1] & 0x000003ff) << 22; \ +temp0[2] = in[2] & 0xffc00000; \ +t1_32 = ((in[2] & 0x00200000) << 10); \ +temp1[2] = (in[2] & 0x001ff800) << 11; \ +t2_65 = ((in[2] & 0x00000400) << 20); \ +temp2[2] = (in[2] & 0x000003ff) << 22; \ +unpuckU32ToThree(temp0[0]); \ +unpuckU32ToThree(temp0[1]); \ +unpuckU32ToThree(temp0[2]); \ +t9 = temp0[0] | temp0[1] >> 1 | temp0[2] >> 2; \ +unpuckU32ToThree(temp1[0]); \ +unpuckU32ToThree(temp1[1]); \ +unpuckU32ToThree(temp1[2]); \ +t1 = t1_32 | ((temp1[0] | temp1[1] >> 1 | temp1[2] >> 2) >> 1); \ +unpuckU32ToThree(temp2[0]); \ +unpuckU32ToThree(temp2[1]); \ +unpuckU32ToThree(temp2[2]); \ +t2 = t2_65 | t2_64 | ((temp2[0] | temp2[1] >> 1 | temp2[2] >> 2) >> 2); \ +*(u32*)(out) = U32BIG(t2); \ +*(u32*)(out + 4) = U32BIG(t1); \ +*(u32*)(out + 8) = U32BIG(t9); \ +} + +#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) +#define sbox(a, b, c, d, f, g, h) \ +{ \ + t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \ +} + +#define U96_BIT_LOTR32_8(t0,t1,t2,t3,t4,t5){\ +t3= LOTR32(t2, 2);\ +t4 =LOTR32(t0, 3);\ +t5 = LOTR32(t1, 3); \ +} +#define U96_BIT_LOTR32_55(t0,t1,t2,t3,t4,t5){\ +t3= LOTR32(t1, 18); \ +t4 = LOTR32(t2, 18);\ +t5 = LOTR32(t0, 19); \ +} +/* +s0 s1 s2 +s3 s4 s5 +s6 s7 s8 +s9 s10 s11 +*/ + + diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_3/crypto_aead.h b/knot/Implementations/crypto_aead/knot128v2/armcortexm_3/crypto_aead.h new file mode 100644 index 0000000..cdfdf19 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_3/crypto_aead.h @@ -0,0 +1,17 @@ +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k +); + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k +); diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_3/encrypt.c b/knot/Implementations/crypto_aead/knot128v2/armcortexm_3/encrypt.c new file mode 100644 index 0000000..28abf55 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_3/encrypt.c @@ -0,0 +1,256 @@ + +#include"auxFormat.h" + + + +#define aead_RATE (192 / 8) +#define PR0_ROUNDS 76 +#define PR_ROUNDS 28 +#define PRF_ROUNDS 32 + +unsigned char constant7Format[127] = { + /*constant7Format[127]:*/ + 0x01,0x08,0x40,0x02,0x10,0x80,0x05,0x09,0x48,0x42,0x12,0x90, + 0x85,0x0c,0x41,0x0a,0x50,0x82,0x15,0x89,0x4d,0x4b,0x5a,0xd2, + 0x97,0x9c,0xc4,0x06,0x11,0x88,0x45,0x0b,0x58,0xc2,0x17,0x99, + 0xcd,0x4e,0x53,0x9a,0xd5,0x8e,0x54,0x83,0x1d,0xc9,0x4f,0x5b, + 0xda,0xd7,0x9e,0xd4,0x86,0x14,0x81,0x0d,0x49,0x4a,0x52,0x92, + 0x95,0x8c,0x44,0x03,0x18,0xc0,0x07,0x19,0xc8,0x47,0x1b,0xd8, + 0xc7,0x1e,0xd1,0x8f,0x5c,0xc3,0x1f,0xd9,0xcf,0x5e,0xd3,0x9f, + 0xdc,0xc6,0x16,0x91,0x8d,0x4c,0x43,0x1a,0xd0,0x87,0x1c,0xc1, + 0x0f,0x59,0xca,0x57,0x9b,0xdd,0xce,0x56,0x93,0x9d,0xcc,0x46, + 0x13,0x98,0xc5,0x0e,0x51,0x8a,0x55,0x8b,0x5d,0xcb,0x5f,0xdb, + 0xdf,0xde,0xd6,0x96,0x94,0x84,0x04, }; +#define ROUND384(lunNum) {\ +s[0] ^= (constant7Format[lunNum] >> 6) & 0x3;\ +s[1] ^= (constant7Format[lunNum] >> 3) & 0x7;\ +s[2] ^= constant7Format[lunNum] & 0x7;\ +sbox(s[0], s[3], s[6], s[9] , s_temp[3], s_temp[6], s_temp[9]);\ +sbox(s[1], s[4], s[7], s[10], s[3] , s_temp[7], s_temp[10]);\ +sbox(s[2], s[5], s[8], s[11], s[4] , s_temp[8], s_temp[11]);\ +s[5] = LOTR32(s_temp[3], 1); \ +U96_BIT_LOTR32_8(s_temp[6], s_temp [7], s_temp[ 8], s[6], s[7], s[8]);\ +U96_BIT_LOTR32_55(s_temp[9], s_temp[10], s_temp[11], s[9], s[10], s[11]);\ +} + +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k) { + u8 i; + u32 s[12] = { 0 }; + u8 tempData[24] = { 0 }; + u32 dataFormat[6] = { 0 }; + u32 s_temp[12] = { 0 }; + u32 t1, t2, t3, t5, t6, t8, t9, t11; + u32 t1_32, t2_64, t2_65; + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + *clen = mlen + CRYPTO_ABYTES; + // initialization + packU96FormatToThreePacket(s, npub); + memcpy(tempData, npub+12, sizeof(unsigned char)*4); + memcpy(tempData+4, k, sizeof(unsigned char) * 16); + packU96FormatToThreePacket((s + 3), tempData); + packU96FormatToThreePacket((s + 6), (tempData+12)); + + s[9] = 0x80000000; + for (i = 0; i < PR0_ROUNDS; i++) { + ROUND384(i); + } + // process associated data + if (adlen) { + while (adlen >= aead_RATE) { + packU96FormatToThreePacket(dataFormat, ad); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + packU96FormatToThreePacket((dataFormat+3), (ad+12)); + s[3] ^= dataFormat[3]; + s[4] ^= dataFormat[4]; + s[5] ^= dataFormat[5]; + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + adlen -= aead_RATE; + ad += aead_RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, ad, adlen * sizeof(unsigned char)); + tempData[adlen] = 0x01; + packU96FormatToThreePacket(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + packU96FormatToThreePacket((dataFormat + 3), (tempData + 12)); + s[3] ^= dataFormat[3]; + s[4] ^= dataFormat[4]; + s[5] ^= dataFormat[5]; + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + } + s[9] ^= 0x80000000; + if (mlen) { + while (mlen >= aead_RATE) { + packU96FormatToThreePacket(dataFormat, m); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + packU96FormatToThreePacket((dataFormat + 3), (m + 12)); + s[3] ^= dataFormat[3]; + s[4] ^= dataFormat[4]; + s[5] ^= dataFormat[5]; + unpackU96FormatToThreePacket(c, s); + unpackU96FormatToThreePacket((c+12), (s+3)); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + mlen -= aead_RATE; + m += aead_RATE; + c += aead_RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, m, mlen * sizeof(unsigned char)); + tempData[mlen]= 0x01; + packU96FormatToThreePacket(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + packU96FormatToThreePacket((dataFormat + 3), (tempData + 12)); + s[3] ^= dataFormat[3]; + s[4] ^= dataFormat[4]; + s[5] ^= dataFormat[5]; + unpackU96FormatToThreePacket(tempData, s); + unpackU96FormatToThreePacket((tempData+12), (s+3)); + memcpy(c, tempData, mlen * sizeof(unsigned char)); + c += mlen; + } + // finalization + for (i = 0; i < PRF_ROUNDS; i++) { + ROUND384(i); + } + // return tag + unpackU96FormatToThreePacket(c, s); + unpackU96FormatToThreePacket(tempData, (s + 3)); + memcpy(c+12, tempData, sizeof(unsigned char) * 4); + return 0; +} + +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) { + + u8 i, j; + u32 s[12] = { 0 }; + u32 s_temp[12] = { 0 }; + u32 dataFormat[12] = { 0 }; + u32 dataFormat_1[12] = { 0 }; + u8 tempData[24] = { 0 }; + u8 tempU8[24] = { 0 }; + u32 t1, t2, t3, t5, t6, t8, t9, t11; + u32 t1_32, t2_64, t2_65; + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; *mlen = clen - CRYPTO_ABYTES; + if (clen < CRYPTO_ABYTES) + return -1; + // initialization + packU96FormatToThreePacket(s, npub); + memcpy(tempData, npub + 12, sizeof(unsigned char) * 4); + memcpy(tempData + 4, k, sizeof(unsigned char) * 16); + packU96FormatToThreePacket((s + 3), tempData); + packU96FormatToThreePacket((s + 6), (tempData + 12)); + + s[9] = 0x80000000; + for (i = 0; i < PR0_ROUNDS; i++) { + ROUND384(i); + } + // process associated data + if (adlen) { + while (adlen >= aead_RATE) { + packU96FormatToThreePacket(dataFormat, ad); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + packU96FormatToThreePacket((dataFormat + 3), (ad + 12)); + s[3] ^= dataFormat[3]; + s[4] ^= dataFormat[4]; + s[5] ^= dataFormat[5]; + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + adlen -= aead_RATE; + ad += aead_RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, ad, adlen * sizeof(unsigned char)); + tempData[adlen] = 0x01; + packU96FormatToThreePacket(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + packU96FormatToThreePacket((dataFormat + 3), (tempData + 12)); + s[3] ^= dataFormat[3]; + s[4] ^= dataFormat[4]; + s[5] ^= dataFormat[5]; + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + } + s[9] ^= 0x80000000; + /////////// + clen -= CRYPTO_ABYTES; + if (clen) { + while (clen >= aead_RATE) { + packU96FormatToThreePacket(dataFormat, c); + dataFormat_1[0] = s[0] ^ dataFormat[0]; + dataFormat_1[1] = s[1] ^ dataFormat[1]; + dataFormat_1[2] = s[2] ^ dataFormat[2]; + packU96FormatToThreePacket((dataFormat+3), (c+12)); + dataFormat_1[3] = s[3] ^ dataFormat[3]; + dataFormat_1[4] = s[4] ^ dataFormat[4]; + dataFormat_1[5] = s[5] ^ dataFormat[5]; + unpackU96FormatToThreePacket(m, dataFormat_1); + unpackU96FormatToThreePacket((m + 12), (dataFormat_1 + 3)); + s[0] = dataFormat[0]; + s[1] = dataFormat[1]; + s[2] = dataFormat[2]; + s[3] = dataFormat[3]; + s[4] = dataFormat[4]; + s[5] = dataFormat[5]; + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + clen -= aead_RATE; + m += aead_RATE; + c += aead_RATE; + } + unpackU96FormatToThreePacket(tempU8, s); + unpackU96FormatToThreePacket((tempU8+12), (s+3)); + for (i = 0; i < clen; ++i, ++m, ++c) + { + *m = tempU8[i] ^ *c; + tempU8[i] = *c; + } + tempU8[i] ^= 0x01; + packU96FormatToThreePacket(s, tempU8); + packU96FormatToThreePacket((s + 3), (tempU8 + 12)); + } + // finalization + for (i = 0; i < PRF_ROUNDS; i++) { + ROUND384(i); + } + // return tag + unpackU96FormatToThreePacket(tempU8, s); + unpackU96FormatToThreePacket((tempU8 + 12), (s + 3)); + if (memcmp((void*)tempU8, (void*)c, CRYPTO_ABYTES)) { + *mlen = 0; + memset(m, 0, sizeof(unsigned char) * (clen - CRYPTO_ABYTES)); + return -1; + } + return 0; +} diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_3/api.h b/knot/Implementations/crypto_aead/knot192/armcortexm_3/api.h new file mode 100644 index 0000000..c3cb1d9 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_3/api.h @@ -0,0 +1,6 @@ +#define CRYPTO_KEYBYTES 24 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 24 +#define CRYPTO_ABYTES 24 +#define CRYPTO_NOOVERLAP 1 + diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_3/auxFormat.h b/knot/Implementations/crypto_aead/knot192/armcortexm_3/auxFormat.h new file mode 100644 index 0000000..f48ef1d --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_3/auxFormat.h @@ -0,0 +1,121 @@ + +#include"crypto_aead.h" +#include"api.h" +#include +#include +#include +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned int u32; +typedef unsigned long long u64; + +#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) +#define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n)))) + + +#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) +#define sbox(a, b, c, d, f, g, h) \ +{ \ + t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \ +} + +#define puckU32ToThree(x){\ +x &= 0x92492492;\ +x = (x | (x << 2)) & 0xc30c30c3;\ +x = (x | (x << 4)) & 0xf00f00f0;\ +x = (x | (x << 8)) & 0xff0000ff;\ +x = (x | (x << 16)) & 0xfff00000;\ +} +#define unpuckU32ToThree(x){\ +x &= 0xfff00000;\ +x = (x | (x >> 16)) & 0xff0000ff;\ +x = (x | (x >> 8)) & 0xf00f00f0;\ +x = (x | (x >> 4)) & 0xc30c30c3;\ +x = (x | (x >> 2)) & 0x92492492;\ +} +#define packU48FormatToThreePacket( out, in) {\ +t1 = (u32)U16BIG(*(u16*)(in + 4)); \ +t2 = U32BIG(*(u32*)(in)); \ +t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6; \ +t1 = t1 << 1; \ +t2 = t2 << 2; \ +temp1[0] = t1; temp1[1] = t1 << 1; temp1[2] = t1 << 2; \ +puckU32ToThree(temp1[0]); \ +puckU32ToThree(temp1[1]); \ +puckU32ToThree(temp1[2]); \ +temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2; \ +puckU32ToThree(temp2[0]); \ +puckU32ToThree(temp2[1]); \ +puckU32ToThree(temp2[2]); \ +out[0] = (temp1[0] >> 11) | (temp2[0] >> 22); \ +out[1] = (temp1[1] >> 11) | (((u32)t2_64) << 10) | (temp2[1] >> 22); \ +out[2] = (temp1[2] >> 11) | (((u32)t2_65) << 10) | (temp2[2] >> 22); \ +} + + +#define packU96FormatToThreePacket(out, in) {\ +t9 = U32BIG(((u32*)in)[2]); \ +t1 = U32BIG(((u32*)in)[1]); \ +t2 = U32BIG(((u32*)in)[0]); \ +t1_32 = (in[7] & 0x80) >> 7, t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6; \ +t1 = t1 << 1; \ +t2 = t2 << 2; \ +temp0[0] = t9; temp0[1] = t9 << 1; temp0[2] = t9 << 2; \ +puckU32ToThree(temp0[0]); \ +puckU32ToThree(temp0[1]); \ +puckU32ToThree(temp0[2]); \ +temp1[0] = t1; temp1[1] = t1 << 1; temp1[2] = t1 << 2; \ +puckU32ToThree(temp1[0]); \ +puckU32ToThree(temp1[1]); \ +puckU32ToThree(temp1[2]); \ +temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2; \ +puckU32ToThree(temp2[0]); \ +puckU32ToThree(temp2[1]); \ +puckU32ToThree(temp2[2]); \ +out[0] = (temp0[0]) | (temp1[0] >> 11) | (temp2[0] >> 22); \ +out[1] = (temp0[1]) | (temp1[1] >> 11) | (((u32)t2_64) << 10) | (temp2[1] >> 22); \ +out[2] = (temp0[2]) | (((u32)t1_32) << 21) | (temp1[2] >> 11) | (((u32)t2_65) << 10) | (temp2[2] >> 22); \ +} +#define unpackU96FormatToThreePacket( out, in) {\ +temp0[0] = in[0] & 0xffe00000; \ +temp1[0] = (in[0] & 0x001ffc00) << 11; \ +temp2[0] = (in[0] & 0x000003ff) << 22; \ +temp0[1] = in[1] & 0xffe00000; \ +temp1[1] = (in[1] & 0x001ff800) << 11; \ +t2_64 = ((in[1] & 0x00000400) << 21); \ +temp2[1] = (in[1] & 0x000003ff) << 22; \ +temp0[2] = in[2] & 0xffc00000; \ +t1_32 = ((in[2] & 0x00200000) << 10); \ +temp1[2] = (in[2] & 0x001ff800) << 11; \ +t2_65 = ((in[2] & 0x00000400) << 20); \ +temp2[2] = (in[2] & 0x000003ff) << 22; \ +unpuckU32ToThree(temp0[0]); \ +unpuckU32ToThree(temp0[1]); \ +unpuckU32ToThree(temp0[2]); \ +t9 = temp0[0] | temp0[1] >> 1 | temp0[2] >> 2; \ +unpuckU32ToThree(temp1[0]); \ +unpuckU32ToThree(temp1[1]); \ +unpuckU32ToThree(temp1[2]); \ +t1 = t1_32 | ((temp1[0] | temp1[1] >> 1 | temp1[2] >> 2) >> 1); \ +unpuckU32ToThree(temp2[0]); \ +unpuckU32ToThree(temp2[1]); \ +unpuckU32ToThree(temp2[2]); \ +t2 = t2_65 | t2_64 | ((temp2[0] | temp2[1] >> 1 | temp2[2] >> 2) >> 2); \ +*(u32*)(out) = U32BIG(t2); \ +*(u32*)(out + 4) = U32BIG(t1); \ +*(u32*)(out + 8) = U32BIG(t9); \ +} +#define U96_BIT_LOTR32_8(t0,t1,t2,t3,t4,t5){\ +t3= LOTR32(t2, 2);\ +t4 =LOTR32(t0, 3);\ +t5 = LOTR32(t1, 3); \ +} +#define U96_BIT_LOTR32_55(t0,t1,t2,t3,t4,t5){\ +t3= LOTR32(t1, 18); \ +t4 = LOTR32(t2, 18);\ +t5 = LOTR32(t0, 19); \ +} + diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_3/crypto_aead.h b/knot/Implementations/crypto_aead/knot192/armcortexm_3/crypto_aead.h new file mode 100644 index 0000000..862d176 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_3/crypto_aead.h @@ -0,0 +1,18 @@ + +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k +); + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k +); diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_3/encrypt.c b/knot/Implementations/crypto_aead/knot192/armcortexm_3/encrypt.c new file mode 100644 index 0000000..9513625 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_3/encrypt.c @@ -0,0 +1,210 @@ + +#include"auxFormat.h" + +#define aead_RATE (96 / 8) +#define PR0_ROUNDS 76 +#define PR_ROUNDS 40 +#define PRF_ROUNDS 44 +unsigned char constant7Format[127] = { + /*constant7Format[127]:*/ +0x01,0x08,0x40,0x02,0x10,0x80,0x05,0x09,0x48,0x42,0x12,0x90, +0x85,0x0c,0x41,0x0a,0x50,0x82,0x15,0x89,0x4d,0x4b,0x5a,0xd2, +0x97,0x9c,0xc4,0x06,0x11,0x88,0x45,0x0b,0x58,0xc2,0x17,0x99, +0xcd,0x4e,0x53,0x9a,0xd5,0x8e,0x54,0x83,0x1d,0xc9,0x4f,0x5b, +0xda,0xd7,0x9e,0xd4,0x86,0x14,0x81,0x0d,0x49,0x4a,0x52,0x92, +0x95,0x8c,0x44,0x03,0x18,0xc0,0x07,0x19,0xc8,0x47,0x1b,0xd8, +0xc7,0x1e,0xd1,0x8f,0x5c,0xc3,0x1f,0xd9,0xcf,0x5e,0xd3,0x9f, +0xdc,0xc6,0x16,0x91,0x8d,0x4c,0x43,0x1a,0xd0,0x87,0x1c,0xc1, +0x0f,0x59,0xca,0x57,0x9b,0xdd,0xce,0x56,0x93,0x9d,0xcc,0x46, +0x13,0x98,0xc5,0x0e,0x51,0x8a,0x55,0x8b,0x5d,0xcb,0x5f,0xdb, +0xdf,0xde,0xd6,0x96,0x94,0x84,0x04, }; +#define ROUND384(lunNum) {\ +s[0] ^= (constant7Format[lunNum] >> 6) & 0x3;\ +s[1] ^= (constant7Format[lunNum] >> 3) & 0x7;\ +s[2] ^= constant7Format[lunNum] & 0x7;\ +sbox(s[0], s[3], s[6], s[9] , s_temp[3], s_temp[6], s_temp[9]);\ +sbox(s[1], s[4], s[7], s[10], s[3] , s_temp[7], s_temp[10]);\ +sbox(s[2], s[5], s[8], s[11], s[4] , s_temp[8], s_temp[11]);\ +s[5] = LOTR32(s_temp[3], 1); \ +U96_BIT_LOTR32_8(s_temp[6], s_temp [7], s_temp[ 8], s[6], s[7], s[8]);\ +U96_BIT_LOTR32_55(s_temp[9], s_temp[10], s_temp[11], s[9], s[10], s[11]);\ +} +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k) { + + u8 i; + u32 s[12] = { 0 }; + u32 dataFormat[3] = { 0 }; + u8 tempData[12] = { 0 }; + u32 s_temp[12] = { 0 }; + u32 t1, t2, t3, t5, t6, t8, t9, t11; + u32 t1_32, t2_64, t2_65; + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + *clen = mlen + CRYPTO_ABYTES; + // initialization + packU96FormatToThreePacket(s, npub); + packU96FormatToThreePacket((s + 3), (npub + 12)); + packU96FormatToThreePacket((s + 6), k); + packU96FormatToThreePacket((s + 9), (k + 12)); + for (i = 0; i < PR0_ROUNDS; i++) { + ROUND384(i); + } + // process associated data + if (adlen) { + while (adlen >= aead_RATE) { + packU96FormatToThreePacket(dataFormat, ad); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + adlen -= aead_RATE; + ad += aead_RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, ad, adlen); + tempData[adlen] = 0x01; + packU96FormatToThreePacket(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + } + s[9] ^= 0x80000000; + if (mlen) { + while (mlen >= aead_RATE) { + packU96FormatToThreePacket(dataFormat, m); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + unpackU96FormatToThreePacket(c, s); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + mlen -= aead_RATE; + m += aead_RATE; + c += aead_RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, m, mlen); + tempData[mlen] = 0x01; + packU96FormatToThreePacket(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + unpackU96FormatToThreePacket(tempData, s); + memcpy(c, tempData, mlen); + c += mlen; + } + // finalization + for (i = 0; i < PRF_ROUNDS; i++) { + ROUND384(i); + } + // return tag + unpackU96FormatToThreePacket(c, s); + unpackU96FormatToThreePacket((c + 12), (s + 3)); + return 0; +} + +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) { + u8 i, j; + u32 s[12] = { 0 }; + u32 dataFormat[6] = { 0 }; + u32 dataFormat_1[3] = { 0 }; + u8 tempData[12] = { 0 }; + u8 tempU8[48] = { 0 }; + u32 s_temp[12] = { 0 }; + u32 t1, t2, t3, t5, t6, t8, t9, t11; + u32 t1_32, t2_64, t2_65; + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + *mlen = clen - CRYPTO_ABYTES; + if (clen < CRYPTO_ABYTES) + return -1; + // initialization + packU96FormatToThreePacket(s, npub); + packU96FormatToThreePacket((s + 3), (npub + 12)); + packU96FormatToThreePacket((s + 6), k); + packU96FormatToThreePacket((s + 9), (k + 12)); + for (i = 0; i < PR0_ROUNDS; i++) { + ROUND384(i); + } + // process associated data + if (adlen) { + while (adlen >= aead_RATE) { + packU96FormatToThreePacket(dataFormat, ad); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + adlen -= aead_RATE; + ad += aead_RATE; + } + memset(tempData, 0, sizeof(tempData)); + + memcpy(tempData, ad, adlen); + tempData[adlen] = 0x01; + packU96FormatToThreePacket(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + } + s[9] ^= 0x80000000; + clen -= CRYPTO_ABYTES; + if (clen) { + while (clen >= aead_RATE) { + packU96FormatToThreePacket(dataFormat, c); + dataFormat_1[0] = s[0] ^ dataFormat[0]; + dataFormat_1[1] = s[1] ^ dataFormat[1]; + dataFormat_1[2] = s[2] ^ dataFormat[2]; + unpackU96FormatToThreePacket(m, dataFormat_1); + s[0] = dataFormat[0]; + s[1] = dataFormat[1]; + s[2] = dataFormat[2]; + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + clen -= aead_RATE; + m += aead_RATE; + c += aead_RATE; + } + unpackU96FormatToThreePacket(tempU8, s); + for (i = 0; i < clen; ++i, ++m, ++c) + { + *m = tempU8[i] ^ *c; + tempU8[i] = *c; + } + tempU8[i] ^= 0x01; + packU96FormatToThreePacket(s, tempU8); + } + // finalization + for (i = 0; i < PRF_ROUNDS; i++) { + ROUND384(i); + } + // return tag + unpackU96FormatToThreePacket(tempU8, s); + unpackU96FormatToThreePacket((tempU8 + 12), (s + 3)); + if (memcmp((void*)tempU8, (void*)c, CRYPTO_ABYTES)) { + *mlen = 0; + memset(m, 0, sizeof(unsigned char) * (clen - CRYPTO_ABYTES)); + return -1; + } + return 0; +} diff --git a/knot/Implementations/crypto_aead/knot256/armcortexm_3/api.h b/knot/Implementations/crypto_aead/knot256/armcortexm_3/api.h new file mode 100644 index 0000000..396f722 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/armcortexm_3/api.h @@ -0,0 +1,7 @@ +#define CRYPTO_KEYBYTES 32 //256/8=32 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 32 +#define CRYPTO_ABYTES 32 +#define CRYPTO_NOOVERLAP 1 + + diff --git a/knot/Implementations/crypto_aead/knot256/armcortexm_3/auxFormat.h b/knot/Implementations/crypto_aead/knot256/armcortexm_3/auxFormat.h new file mode 100644 index 0000000..29e78bc --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/armcortexm_3/auxFormat.h @@ -0,0 +1,106 @@ + +#include"crypto_aead.h" +#include"api.h" +#include +#define U32BIG(x) (x) + + +#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) +#define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n)))) + + +#define sbox(a, b, c, d, f, g, h) \ +{ \ + t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \ +} + + +typedef unsigned char u8; +typedef unsigned int u32; +typedef unsigned long long u64; +void printU8(char name[], u8 var[], long len, int offset); + +// t9 +#define puck32(in)\ +{\ +t9 = (in ^ (in >> 1)) & 0x22222222; in ^= t9 ^ (t9 << 1);\ +t9 = (in ^ (in >> 2)) & 0x0C0C0C0C; in ^= t9 ^ (t9 << 2);\ +t9 = (in ^ (in >> 4)) & 0x00F000F0; in ^= t9 ^ (t9 << 4);\ +t9 = (in ^ (in >> 8)) & 0x0000FF00; in ^= t9 ^ (t9 << 8);\ +} +// t9 +#define unpuck32(t0){\ + t9 = (t0 ^ (t0 >> 8)) & 0x0000FF00, t0 ^= t9 ^ (t9 << 8); \ + t9 = (t0 ^ (t0 >> 4)) & 0x00F000F0, t0 ^= t9 ^ (t9 << 4); \ + t9 = (t0 ^ (t0 >> 2)) & 0x0C0C0C0C, t0 ^= t9 ^ (t9 << 2); \ + t9 = (t0 ^ (t0 >> 1)) & 0x22222222, t0 ^= t9 ^ (t9 << 1); \ +} +//u32 t1, t2, t3,t8, +#define packU128FormatToFourPacket(out,in) {\ + t8 = U32BIG(((u32*)in)[0]); \ + t1 = U32BIG(((u32*)in)[1]); \ + t2 = U32BIG(((u32*)in)[2]); \ + t3 = U32BIG(((u32*)in)[3]); \ + puck32(t8); puck32(t8); \ + puck32(t1); puck32(t1); \ + puck32(t2); puck32(t2); \ + puck32(t3); puck32(t3); \ + out[3] = t3 & 0xff000000 | ((t2 >> 8) & 0x00ff0000) | ((t1 >> 16) & 0x0000ff00) | (t8 >> 24); \ + out[2] = ((t3 << 8) & 0xff000000) | (t2 & 0x00ff0000) | ((t1 >> 8) & 0x0000ff00) | ((t8 >> 16) & 0x000000ff); \ + out[1] = ((t3 << 16) & 0xff000000) | ((t2 << 8) & 0x00ff0000) | (t1 & 0x0000ff00) | ((t8 >> 8) & 0x000000ff); \ + out[0] = ((t3 << 24) & 0xff000000) | ((t2 << 16) & 0x00ff0000) | ((t1 << 8) & 0x0000ff00) | (t8 & 0x000000ff); \ +} +//u32 u32 t1, t2, t3,t8, +#define unpackU128FormatToFourPacket( out, dataFormat) {\ +t3 = dataFormat[3] & 0xff000000 | ((dataFormat[2] >> 8) & 0x00ff0000) | ((dataFormat[1] >> 16) & 0x0000ff00) | (dataFormat[0] >> 24); \ +t2 = ((dataFormat[3] << 8) & 0xff000000) | (dataFormat[2] & 0x00ff0000) | ((dataFormat[1] >> 8) & 0x0000ff00) | ((dataFormat[0] >> 16) & 0x000000ff); \ +t1 = ((dataFormat[3] << 16) & 0xff000000) | ((dataFormat[2] << 8) & 0x00ff0000) | (dataFormat[1] & 0x0000ff00) | ((dataFormat[0] >> 8) & 0x000000ff); \ +t8 = ((dataFormat[3] << 24) & 0xff000000) | ((dataFormat[2] << 16) & 0x00ff0000) | ((dataFormat[1] << 8) & 0x0000ff00) | (dataFormat[0] & 0x000000ff); \ +unpuck32(t8); unpuck32(t8); \ +unpuck32(t1); unpuck32(t1); \ +unpuck32(t2); unpuck32(t2); \ +unpuck32(t3); unpuck32(t3); \ +((u32*)out)[0] = U32BIG(t8); \ +((u32*)out)[1] = U32BIG(t1); \ +((u32*)out)[2] = U32BIG(t2); \ +((u32*)out)[3] = U32BIG(t3); \ +} +#define packU64FormatToFourPacket( out, in) {\ +t1 = U32BIG(((u32*)in)[0]); \ +t2 = U32BIG(((u32*)in)[1]); \ +puck32(t1); \ +puck32(t1); \ +puck32(t2); \ +puck32(t2); \ +out[3] = ((t2 >> 16) & 0x0000ff00) | ((t1 >> 24)); \ +out[2] = ((t2 >> 8) & 0x0000ff00) | ((t1 >> 16) & 0x000000ff); \ +out[1] = (t2 & 0x0000ff00) | ((t1 >> 8) & 0x000000ff); \ +out[0] = ((t2 << 8) & 0x0000ff00) | (t1 & 0x000000ff); \ +} +#define BIT_LOTR32_16(t0,t1,t2,t3,t4,t5,t6,t7){\ +t4= LOTR32(t0, 4);\ +t5 = LOTR32(t1, 4);\ +t6 = LOTR32(t2, 4); \ +t7 = LOTR32(t3, 4); \ +} +#define BIT_LOTR32_25(t0,t1,t2,t3,t4,t5,t6,t7){\ +t4= LOTR32(t3, 7);\ +t5 = LOTR32(t0, 6);\ +t6 = LOTR32(t1, 6); \ +t7 = LOTR32(t2, 6); \ +} + +#define ROUND512( arr,lunNum) {\ +s[3] ^= (arr[lunNum] >> 6) & 0x3;\ +s[2] ^= (arr[lunNum] >> 4) & 0x3;\ +s[1] ^= (arr[lunNum] >> 2) & 0x3;\ +s[0] ^= arr[lunNum] & 0x3;\ +sbox(s[3], s[7], s[11], s[15], s_temp[7], s_temp[11], s_temp[15]);\ +sbox(s[2], s[6], s[10], s[14], s[7] , s_temp[10], s_temp[14]);\ +sbox(s[1], s[5], s[9], s[13], s[6] , s_temp[9], s_temp[13]);\ +sbox(s[0], s[4], s[8], s[12], s[5] , s_temp[8], s_temp[12]);\ +s[4]= LOTR32(s_temp[7], 1);\ +BIT_LOTR32_16(s_temp[8], s_temp[9], s_temp[10], s_temp[11], s[8], s[9], s[10], s[11]);\ +BIT_LOTR32_25(s_temp[12], s_temp[13], s_temp[14], s_temp[15], s[12], s[13], s[14], s[15]);\ +} + diff --git a/knot/Implementations/crypto_aead/knot256/armcortexm_3/crypto_aead.h b/knot/Implementations/crypto_aead/knot256/armcortexm_3/crypto_aead.h new file mode 100644 index 0000000..cdfdf19 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/armcortexm_3/crypto_aead.h @@ -0,0 +1,17 @@ +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k +); + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k +); diff --git a/knot/Implementations/crypto_aead/knot256/armcortexm_3/encrypt.c b/knot/Implementations/crypto_aead/knot256/armcortexm_3/encrypt.c new file mode 100644 index 0000000..d8d1476 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/armcortexm_3/encrypt.c @@ -0,0 +1,328 @@ + +#include"auxFormat.h" + +#define aead_RATE (128 / 8) +#define PR0_ROUNDS 100 +#define PR_ROUNDS 52 +#define PRF_ROUNDS 56 +unsigned char constant7Format_aead[127] = { + /*constant7_aead_256*/ +0x1, +0x4, +0x10, +0x40, +0x2, +0x8, +0x21, +0x5, +0x14, +0x50, +0x42, +0xa, +0x29, +0x24, +0x11, +0x44, +0x12, +0x48, +0x23, +0xd, +0x35, +0x55, +0x56, +0x5a, +0x6b, +0x2e, +0x38, +0x60, +0x3, +0xc, +0x31, +0x45, +0x16, +0x58, +0x63, +0xf, +0x3d, +0x74, +0x53, +0x4e, +0x3b, +0x6c, +0x32, +0x49, +0x27, +0x1d, +0x75, +0x57, +0x5e, +0x7b, +0x6e, +0x3a, +0x68, +0x22, +0x9, +0x25, +0x15, +0x54, +0x52, +0x4a, +0x2b, +0x2c, +0x30, +0x41, +0x6, +0x18, +0x61, +0x7, +0x1c, +0x71, +0x47, +0x1e, +0x79, +0x66, +0x1b, +0x6d, +0x36, +0x59, +0x67, +0x1f, +0x7d, +0x76, +0x5b, +0x6f, +0x3e, +0x78, +0x62, +0xb, +0x2d, +0x34, +0x51, +0x46, +0x1a, +0x69, +0x26, +0x19, +0x65, +0x17, +0x5c, +0x73, +0x4f, +0x3f, +0x7c, +0x72, +0x4b, +0x2f, +0x3c, +0x70, +0x43, +0xe, +0x39, +0x64, +0x13, +0x4c, +0x33, +0x4d, +0x37, +0x5d, +0x77, +0x5f, +0x7f, +0x7e, +0x7a, +0x6a, +0x2a, +0x28, +0x20, +}; + + + +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k +) { + u32 i ; + u32 s_temp[16] = { 0 }; + u32 t1, t2, t3, t5, t6, t8, t9, t11; + u32 s[16] = { 0 }; + u32 dataFormat[4] = { 0 }; + u8 tempData[16] = {0}; + *clen = mlen + CRYPTO_ABYTES; + //initialization + packU128FormatToFourPacket(s, npub); + packU128FormatToFourPacket((s + 4), (npub + 16)); + packU128FormatToFourPacket((s + 8), k); + packU128FormatToFourPacket((s + 12), (k + 16)); + for (i = 0; i < PR0_ROUNDS; i++) { + ROUND512(constant7Format_aead,i); + } + // process associated data + if (adlen) { + while (adlen >= aead_RATE) { + packU128FormatToFourPacket(dataFormat, ad); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + s[3] ^= dataFormat[3]; + for (i = 0; i < PR_ROUNDS; i++) { + ROUND512(constant7Format_aead, i); + } + adlen -= aead_RATE; + ad += aead_RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, ad, adlen * sizeof(unsigned char)); + tempData[adlen] = 0x01; + packU128FormatToFourPacket(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + s[3] ^= dataFormat[3]; + for (i = 0; i < PR_ROUNDS; i++) { + ROUND512(constant7Format_aead, i); + } + } + s[15] ^= 0x80000000; + if (mlen) { + while (mlen >= aead_RATE) { + packU128FormatToFourPacket(dataFormat, m); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + s[3] ^= dataFormat[3]; + unpackU128FormatToFourPacket(c, s); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND512(constant7Format_aead, i); + } + mlen -= aead_RATE; + m += aead_RATE; + c += aead_RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, m, mlen * sizeof(unsigned char)); + tempData[mlen]= 0x01; + packU128FormatToFourPacket(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + s[3] ^= dataFormat[3]; + unpackU128FormatToFourPacket(tempData, s); + memcpy(c, tempData, mlen * sizeof(unsigned char)); + c += mlen; + } + // finalization + for (i = 0; i < PRF_ROUNDS; i++) { + ROUND512(constant7Format_aead, i); + } + // return tag + unpackU128FormatToFourPacket(c, s); + unpackU128FormatToFourPacket((c+16), (s+4)); + return 0; +} + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k +){ + u32 s_temp[16] = { 0 }; + u32 t1, t2, t3, t5, t6, t8, t9, t11; + u8 i ; + // initialization + u32 s[16] = { 0 }; + u32 dataFormat_1[4] = { 0 }; + u32 dataFormat_2[4] = { 0 }; + u8 tempData[16] = { 0 }; + u8 tempU8[64] = { 0 }; + + if (clen < CRYPTO_ABYTES) + return -1; + *mlen = clen - CRYPTO_ABYTES; + //initialization + packU128FormatToFourPacket(s, npub); + packU128FormatToFourPacket((s + 4), (npub + 16)); + packU128FormatToFourPacket((s + 8), k); + packU128FormatToFourPacket((s + 12), (k + 16)); + for (i = 0; i < PR0_ROUNDS; i++) { + ROUND512(constant7Format_aead, i); + } + // process associated data + if (adlen) { + while (adlen >= aead_RATE) { + packU128FormatToFourPacket(dataFormat_2, ad); + s[0] ^= dataFormat_2[0]; + s[1] ^= dataFormat_2[1]; + s[2] ^= dataFormat_2[2]; + s[3] ^= dataFormat_2[3]; + for (i = 0; i < PR_ROUNDS; i++) { + ROUND512(constant7Format_aead, i); + } + adlen -= aead_RATE; + ad += aead_RATE; + } + memset(tempData, 0, sizeof(tempData)); + + memcpy(tempData, ad, adlen * sizeof(unsigned char)); + tempData[adlen] = 0x01; + packU128FormatToFourPacket(dataFormat_2, tempData); + s[0] ^= dataFormat_2[0]; + s[1] ^= dataFormat_2[1]; + s[2] ^= dataFormat_2[2]; + s[3] ^= dataFormat_2[3]; + for (i = 0; i < PR_ROUNDS; i++) { + ROUND512(constant7Format_aead, i); + } + } + s[15] ^= 0x80000000; + clen = clen - CRYPTO_KEYBYTES; + + if (clen) { + while (clen >= aead_RATE) { + packU128FormatToFourPacket(dataFormat_2, c); + dataFormat_1[0] = s[0] ^ dataFormat_2[0]; + dataFormat_1[1] = s[1] ^ dataFormat_2[1]; + dataFormat_1[2] = s[2] ^ dataFormat_2[2]; + dataFormat_1[3] = s[3] ^ dataFormat_2[3]; + unpackU128FormatToFourPacket(m, dataFormat_1); + s[0] = dataFormat_2[0]; + s[1] = dataFormat_2[1]; + s[2] = dataFormat_2[2]; + s[3] = dataFormat_2[3]; + for (i = 0; i < PR_ROUNDS; i++) { + ROUND512(constant7Format_aead, i); + } + clen -= aead_RATE; + m += aead_RATE; + c += aead_RATE; + } + unpackU128FormatToFourPacket(tempU8, s); + for (i = 0; i < clen; ++i, ++m, ++c) + { + *m = tempU8[i] ^ *c; + tempU8[i] = *c; + } + tempU8[i] ^= 0x01; + packU128FormatToFourPacket(s, tempU8); + } + // finalization + for (i = 0; i < PRF_ROUNDS; i++) { + ROUND512(constant7Format_aead, i); + } + // return tag + unpackU128FormatToFourPacket(tempU8, s); + unpackU128FormatToFourPacket((tempU8 + 16), (s + 4)); + if (memcmp((void*)tempU8, (void*)c, CRYPTO_ABYTES)) { + *mlen = 0; + memset(m, 0, sizeof(unsigned char) * (clen - CRYPTO_ABYTES)); + return -1; + } + return 0; +} \ No newline at end of file diff --git a/orange/Implementations/crypto_aead/orangezestv1/rhys/internal-photon256-avr.S b/orange/Implementations/crypto_aead/orangezestv1/rhys/internal-photon256-avr.S new file mode 100644 index 0000000..5826dd3 --- /dev/null +++ b/orange/Implementations/crypto_aead/orangezestv1/rhys/internal-photon256-avr.S @@ -0,0 +1,2583 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .text +.global photon256_permute + .type photon256_permute, @function +photon256_permute: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 49 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + bst r18,0 + bld r2,0 + bst r18,1 + bld r6,0 + bst r18,2 + bld r10,0 + bst r18,3 + bld r14,0 + bst r18,4 + bld r2,1 + bst r18,5 + bld r6,1 + bst r18,6 + bld r10,1 + bst r18,7 + bld r14,1 + bst r19,0 + bld r2,2 + bst r19,1 + bld r6,2 + bst r19,2 + bld r10,2 + bst r19,3 + bld r14,2 + bst r19,4 + bld r2,3 + bst r19,5 + bld r6,3 + bst r19,6 + bld r10,3 + bst r19,7 + bld r14,3 + bst r20,0 + bld r2,4 + bst r20,1 + bld r6,4 + bst r20,2 + bld r10,4 + bst r20,3 + bld r14,4 + bst r20,4 + bld r2,5 + bst r20,5 + bld r6,5 + bst r20,6 + bld r10,5 + bst r20,7 + bld r14,5 + bst r21,0 + bld r2,6 + bst r21,1 + bld r6,6 + bst r21,2 + bld r10,6 + bst r21,3 + bld r14,6 + bst r21,4 + bld r2,7 + bst r21,5 + bld r6,7 + bst r21,6 + bld r10,7 + bst r21,7 + bld r14,7 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + bst r18,0 + bld r3,0 + bst r18,1 + bld r7,0 + bst r18,2 + bld r11,0 + bst r18,3 + bld r15,0 + bst r18,4 + bld r3,1 + bst r18,5 + bld r7,1 + bst r18,6 + bld r11,1 + bst r18,7 + bld r15,1 + bst r19,0 + bld r3,2 + bst r19,1 + bld r7,2 + bst r19,2 + bld r11,2 + bst r19,3 + bld r15,2 + bst r19,4 + bld r3,3 + bst r19,5 + bld r7,3 + bst r19,6 + bld r11,3 + bst r19,7 + bld r15,3 + bst r20,0 + bld r3,4 + bst r20,1 + bld r7,4 + bst r20,2 + bld r11,4 + bst r20,3 + bld r15,4 + bst r20,4 + bld r3,5 + bst r20,5 + bld r7,5 + bst r20,6 + bld r11,5 + bst r20,7 + bld r15,5 + bst r21,0 + bld r3,6 + bst r21,1 + bld r7,6 + bst r21,2 + bld r11,6 + bst r21,3 + bld r15,6 + bst r21,4 + bld r3,7 + bst r21,5 + bld r7,7 + bst r21,6 + bld r11,7 + bst r21,7 + bld r15,7 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + bst r18,0 + bld r4,0 + bst r18,1 + bld r8,0 + bst r18,2 + bld r12,0 + bst r18,3 + bld r24,0 + bst r18,4 + bld r4,1 + bst r18,5 + bld r8,1 + bst r18,6 + bld r12,1 + bst r18,7 + bld r24,1 + bst r19,0 + bld r4,2 + bst r19,1 + bld r8,2 + bst r19,2 + bld r12,2 + bst r19,3 + bld r24,2 + bst r19,4 + bld r4,3 + bst r19,5 + bld r8,3 + bst r19,6 + bld r12,3 + bst r19,7 + bld r24,3 + bst r20,0 + bld r4,4 + bst r20,1 + bld r8,4 + bst r20,2 + bld r12,4 + bst r20,3 + bld r24,4 + bst r20,4 + bld r4,5 + bst r20,5 + bld r8,5 + bst r20,6 + bld r12,5 + bst r20,7 + bld r24,5 + bst r21,0 + bld r4,6 + bst r21,1 + bld r8,6 + bst r21,2 + bld r12,6 + bst r21,3 + bld r24,6 + bst r21,4 + bld r4,7 + bst r21,5 + bld r8,7 + bst r21,6 + bld r12,7 + bst r21,7 + bld r24,7 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + bst r18,0 + bld r5,0 + bst r18,1 + bld r9,0 + bst r18,2 + bld r13,0 + bst r18,3 + bld r25,0 + bst r18,4 + bld r5,1 + bst r18,5 + bld r9,1 + bst r18,6 + bld r13,1 + bst r18,7 + bld r25,1 + bst r19,0 + bld r5,2 + bst r19,1 + bld r9,2 + bst r19,2 + bld r13,2 + bst r19,3 + bld r25,2 + bst r19,4 + bld r5,3 + bst r19,5 + bld r9,3 + bst r19,6 + bld r13,3 + bst r19,7 + bld r25,3 + bst r20,0 + bld r5,4 + bst r20,1 + bld r9,4 + bst r20,2 + bld r13,4 + bst r20,3 + bld r25,4 + bst r20,4 + bld r5,5 + bst r20,5 + bld r9,5 + bst r20,6 + bld r13,5 + bst r20,7 + bld r25,5 + bst r21,0 + bld r5,6 + bst r21,1 + bld r9,6 + bst r21,2 + bld r13,6 + bst r21,3 + bld r25,6 + bst r21,4 + bld r5,7 + bst r21,5 + bld r9,7 + bst r21,6 + bld r13,7 + bst r21,7 + bld r25,7 + std Z+16,r2 + std Z+17,r3 + std Z+18,r4 + std Z+19,r5 + std Z+20,r6 + std Z+21,r7 + std Z+22,r8 + std Z+23,r9 + std Z+24,r10 + std Z+25,r11 + std Z+26,r12 + std Z+27,r13 + std Z+28,r14 + std Z+29,r15 + std Z+30,r24 + std Z+31,r25 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + bst r18,0 + bld r2,0 + bst r18,1 + bld r6,0 + bst r18,2 + bld r10,0 + bst r18,3 + bld r14,0 + bst r18,4 + bld r2,1 + bst r18,5 + bld r6,1 + bst r18,6 + bld r10,1 + bst r18,7 + bld r14,1 + bst r19,0 + bld r2,2 + bst r19,1 + bld r6,2 + bst r19,2 + bld r10,2 + bst r19,3 + bld r14,2 + bst r19,4 + bld r2,3 + bst r19,5 + bld r6,3 + bst r19,6 + bld r10,3 + bst r19,7 + bld r14,3 + bst r20,0 + bld r2,4 + bst r20,1 + bld r6,4 + bst r20,2 + bld r10,4 + bst r20,3 + bld r14,4 + bst r20,4 + bld r2,5 + bst r20,5 + bld r6,5 + bst r20,6 + bld r10,5 + bst r20,7 + bld r14,5 + bst r21,0 + bld r2,6 + bst r21,1 + bld r6,6 + bst r21,2 + bld r10,6 + bst r21,3 + bld r14,6 + bst r21,4 + bld r2,7 + bst r21,5 + bld r6,7 + bst r21,6 + bld r10,7 + bst r21,7 + bld r14,7 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r20,Z+6 + ldd r21,Z+7 + bst r18,0 + bld r3,0 + bst r18,1 + bld r7,0 + bst r18,2 + bld r11,0 + bst r18,3 + bld r15,0 + bst r18,4 + bld r3,1 + bst r18,5 + bld r7,1 + bst r18,6 + bld r11,1 + bst r18,7 + bld r15,1 + bst r19,0 + bld r3,2 + bst r19,1 + bld r7,2 + bst r19,2 + bld r11,2 + bst r19,3 + bld r15,2 + bst r19,4 + bld r3,3 + bst r19,5 + bld r7,3 + bst r19,6 + bld r11,3 + bst r19,7 + bld r15,3 + bst r20,0 + bld r3,4 + bst r20,1 + bld r7,4 + bst r20,2 + bld r11,4 + bst r20,3 + bld r15,4 + bst r20,4 + bld r3,5 + bst r20,5 + bld r7,5 + bst r20,6 + bld r11,5 + bst r20,7 + bld r15,5 + bst r21,0 + bld r3,6 + bst r21,1 + bld r7,6 + bst r21,2 + bld r11,6 + bst r21,3 + bld r15,6 + bst r21,4 + bld r3,7 + bst r21,5 + bld r7,7 + bst r21,6 + bld r11,7 + bst r21,7 + bld r15,7 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r20,Z+10 + ldd r21,Z+11 + bst r18,0 + bld r4,0 + bst r18,1 + bld r8,0 + bst r18,2 + bld r12,0 + bst r18,3 + bld r24,0 + bst r18,4 + bld r4,1 + bst r18,5 + bld r8,1 + bst r18,6 + bld r12,1 + bst r18,7 + bld r24,1 + bst r19,0 + bld r4,2 + bst r19,1 + bld r8,2 + bst r19,2 + bld r12,2 + bst r19,3 + bld r24,2 + bst r19,4 + bld r4,3 + bst r19,5 + bld r8,3 + bst r19,6 + bld r12,3 + bst r19,7 + bld r24,3 + bst r20,0 + bld r4,4 + bst r20,1 + bld r8,4 + bst r20,2 + bld r12,4 + bst r20,3 + bld r24,4 + bst r20,4 + bld r4,5 + bst r20,5 + bld r8,5 + bst r20,6 + bld r12,5 + bst r20,7 + bld r24,5 + bst r21,0 + bld r4,6 + bst r21,1 + bld r8,6 + bst r21,2 + bld r12,6 + bst r21,3 + bld r24,6 + bst r21,4 + bld r4,7 + bst r21,5 + bld r8,7 + bst r21,6 + bld r12,7 + bst r21,7 + bld r24,7 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r20,Z+14 + ldd r21,Z+15 + bst r18,0 + bld r5,0 + bst r18,1 + bld r9,0 + bst r18,2 + bld r13,0 + bst r18,3 + bld r25,0 + bst r18,4 + bld r5,1 + bst r18,5 + bld r9,1 + bst r18,6 + bld r13,1 + bst r18,7 + bld r25,1 + bst r19,0 + bld r5,2 + bst r19,1 + bld r9,2 + bst r19,2 + bld r13,2 + bst r19,3 + bld r25,2 + bst r19,4 + bld r5,3 + bst r19,5 + bld r9,3 + bst r19,6 + bld r13,3 + bst r19,7 + bld r25,3 + bst r20,0 + bld r5,4 + bst r20,1 + bld r9,4 + bst r20,2 + bld r13,4 + bst r20,3 + bld r25,4 + bst r20,4 + bld r5,5 + bst r20,5 + bld r9,5 + bst r20,6 + bld r13,5 + bst r20,7 + bld r25,5 + bst r21,0 + bld r5,6 + bst r21,1 + bld r9,6 + bst r21,2 + bld r13,6 + bst r21,3 + bld r25,6 + bst r21,4 + bld r5,7 + bst r21,5 + bld r9,7 + bst r21,6 + bld r13,7 + bst r21,7 + bld r25,7 + ldi r22,225 + ldi r23,240 + ldi r26,210 + ldi r27,150 + rcall 621f + ldi r22,195 + ldi r23,210 + ldi r26,240 + ldi r27,180 + rcall 621f + ldi r22,135 + ldi r23,150 + ldi r26,180 + ldi r27,240 + rcall 621f + ldi r22,30 + ldi r23,15 + ldi r26,45 + ldi r27,105 + rcall 621f + ldi r22,45 + ldi r23,60 + ldi r26,30 + ldi r27,90 + rcall 621f + ldi r22,75 + ldi r23,90 + ldi r26,120 + ldi r27,60 + rcall 621f + ldi r22,150 + ldi r23,135 + ldi r26,165 + ldi r27,225 + rcall 621f + ldi r22,60 + ldi r23,45 + ldi r26,15 + ldi r27,75 + rcall 621f + ldi r22,105 + ldi r23,120 + ldi r26,90 + ldi r27,30 + rcall 621f + ldi r22,210 + ldi r23,195 + ldi r26,225 + ldi r27,165 + rcall 621f + ldi r22,165 + ldi r23,180 + ldi r26,150 + ldi r27,210 + rcall 621f + ldi r22,90 + ldi r23,75 + ldi r26,105 + ldi r27,45 + rcall 621f + rjmp 1960f +621: + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r2,r18 + eor r3,r19 + eor r4,r20 + eor r5,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r6,r18 + eor r7,r19 + eor r8,r20 + eor r9,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r10,r18 + eor r11,r19 + eor r12,r20 + eor r13,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r14,r18 + eor r15,r19 + eor r24,r20 + eor r25,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + eor r6,r10 + mov r0,r10 + and r0,r6 + eor r14,r0 + mov r18,r14 + and r14,r6 + eor r14,r10 + mov r16,r14 + eor r14,r2 + com r14 + mov r10,r14 + or r16,r2 + eor r2,r18 + eor r6,r2 + or r10,r6 + eor r10,r18 + eor r6,r16 + eor r14,r6 + eor r7,r11 + mov r0,r11 + and r0,r7 + eor r15,r0 + mov r19,r15 + and r15,r7 + eor r15,r11 + mov r16,r15 + eor r15,r3 + com r15 + mov r11,r15 + or r16,r3 + eor r3,r19 + eor r7,r3 + or r11,r7 + eor r11,r19 + eor r7,r16 + eor r15,r7 + eor r8,r12 + mov r0,r12 + and r0,r8 + eor r24,r0 + mov r20,r24 + and r24,r8 + eor r24,r12 + mov r16,r24 + eor r24,r4 + com r24 + mov r12,r24 + or r16,r4 + eor r4,r20 + eor r8,r4 + or r12,r8 + eor r12,r20 + eor r8,r16 + eor r24,r8 + eor r9,r13 + mov r0,r13 + and r0,r9 + eor r25,r0 + mov r21,r25 + and r25,r9 + eor r25,r13 + mov r16,r25 + eor r25,r5 + com r25 + mov r13,r25 + or r16,r5 + eor r5,r21 + eor r9,r5 + or r13,r9 + eor r13,r21 + eor r9,r16 + eor r25,r9 + bst r3,0 + lsr r3 + bld r3,7 + bst r7,0 + lsr r7 + bld r7,7 + bst r11,0 + lsr r11 + bld r11,7 + bst r15,0 + lsr r15 + bld r15,7 + mov r0,r1 + lsr r4 + ror r0 + lsr r4 + ror r0 + or r4,r0 + mov r0,r1 + lsr r8 + ror r0 + lsr r8 + ror r0 + or r8,r0 + mov r0,r1 + lsr r12 + ror r0 + lsr r12 + ror r0 + or r12,r0 + mov r0,r1 + lsr r24 + ror r0 + lsr r24 + ror r0 + or r24,r0 + mov r0,r1 + lsr r5 + ror r0 + lsr r5 + ror r0 + lsr r5 + ror r0 + or r5,r0 + mov r0,r1 + lsr r9 + ror r0 + lsr r9 + ror r0 + lsr r9 + ror r0 + or r9,r0 + mov r0,r1 + lsr r13 + ror r0 + lsr r13 + ror r0 + lsr r13 + ror r0 + or r13,r0 + mov r0,r1 + lsr r25 + ror r0 + lsr r25 + ror r0 + lsr r25 + ror r0 + or r25,r0 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Z+16 + ldd r3,Z+17 + ldd r4,Z+18 + ldd r5,Z+19 + ldd r6,Z+20 + ldd r7,Z+21 + ldd r8,Z+22 + ldd r9,Z+23 + ldd r10,Z+24 + ldd r11,Z+25 + ldd r12,Z+26 + ldd r13,Z+27 + ldd r14,Z+28 + ldd r15,Z+29 + ldd r24,Z+30 + ldd r25,Z+31 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r2,r18 + eor r3,r19 + eor r4,r20 + eor r5,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r6,r18 + eor r7,r19 + eor r8,r20 + eor r9,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r10,r18 + eor r11,r19 + eor r12,r20 + eor r13,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + eor r14,r22 + eor r15,r23 + eor r24,r26 + eor r25,r27 + eor r6,r10 + mov r0,r10 + and r0,r6 + eor r14,r0 + mov r18,r14 + and r14,r6 + eor r14,r10 + mov r22,r14 + eor r14,r2 + com r14 + mov r10,r14 + or r22,r2 + eor r2,r18 + eor r6,r2 + or r10,r6 + eor r10,r18 + eor r6,r22 + eor r14,r6 + eor r7,r11 + mov r0,r11 + and r0,r7 + eor r15,r0 + mov r19,r15 + and r15,r7 + eor r15,r11 + mov r22,r15 + eor r15,r3 + com r15 + mov r11,r15 + or r22,r3 + eor r3,r19 + eor r7,r3 + or r11,r7 + eor r11,r19 + eor r7,r22 + eor r15,r7 + eor r8,r12 + mov r0,r12 + and r0,r8 + eor r24,r0 + mov r20,r24 + and r24,r8 + eor r24,r12 + mov r22,r24 + eor r24,r4 + com r24 + mov r12,r24 + or r22,r4 + eor r4,r20 + eor r8,r4 + or r12,r8 + eor r12,r20 + eor r8,r22 + eor r24,r8 + eor r9,r13 + mov r0,r13 + and r0,r9 + eor r25,r0 + mov r21,r25 + and r25,r9 + eor r25,r13 + mov r22,r25 + eor r25,r5 + com r25 + mov r13,r25 + or r22,r5 + eor r5,r21 + eor r9,r5 + or r13,r9 + eor r13,r21 + eor r9,r22 + eor r25,r9 + swap r2 + swap r6 + swap r10 + swap r14 + lsl r3 + adc r3,r1 + lsl r3 + adc r3,r1 + lsl r3 + adc r3,r1 + lsl r7 + adc r7,r1 + lsl r7 + adc r7,r1 + lsl r7 + adc r7,r1 + lsl r11 + adc r11,r1 + lsl r11 + adc r11,r1 + lsl r11 + adc r11,r1 + lsl r15 + adc r15,r1 + lsl r15 + adc r15,r1 + lsl r15 + adc r15,r1 + lsl r4 + adc r4,r1 + lsl r4 + adc r4,r1 + lsl r8 + adc r8,r1 + lsl r8 + adc r8,r1 + lsl r12 + adc r12,r1 + lsl r12 + adc r12,r1 + lsl r24 + adc r24,r1 + lsl r24 + adc r24,r1 + lsl r5 + adc r5,r1 + lsl r9 + adc r9,r1 + lsl r13 + adc r13,r1 + lsl r25 + adc r25,r1 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r6,Y+2 + ldd r10,Y+3 + ldd r14,Y+4 + ldd r3,Y+5 + ldd r7,Y+6 + ldd r11,Y+7 + ldd r15,Y+8 + ldd r4,Y+9 + ldd r8,Y+10 + ldd r12,Y+11 + ldd r24,Y+12 + ldd r5,Y+13 + ldd r9,Y+14 + ldd r13,Y+15 + ldd r25,Y+16 + movw r22,r2 + movw r26,r4 + eor r22,r27 + mov r18,r27 + mov r19,r22 + mov r20,r23 + mov r21,r26 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + st Z,r18 + std Z+4,r19 + std Z+8,r20 + std Z+12,r21 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + movw r18,r26 + movw r20,r22 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+1,r18 + std Z+5,r19 + std Z+9,r20 + std Z+13,r21 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + movw r18,r26 + movw r20,r22 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+2,r18 + std Z+6,r19 + std Z+10,r20 + std Z+14,r21 + movw r18,r2 + movw r20,r4 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + std Z+3,r18 + std Z+7,r19 + std Z+11,r20 + std Z+15,r21 + movw r18,r2 + movw r20,r4 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+16,r18 + std Z+20,r19 + std Z+24,r20 + std Z+28,r21 + movw r18,r2 + movw r20,r4 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+17,r18 + std Z+21,r19 + std Z+25,r20 + std Z+29,r21 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + movw r18,r26 + movw r20,r22 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+18,r18 + std Z+22,r19 + std Z+26,r20 + std Z+30,r21 + movw r18,r2 + movw r20,r4 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+19,r18 + std Z+23,r19 + std Z+27,r20 + std Z+31,r21 + ldd r2,Y+17 + ldd r6,Y+18 + ldd r10,Y+19 + ldd r14,Y+20 + ldd r3,Y+21 + ldd r7,Y+22 + ldd r11,Y+23 + ldd r15,Y+24 + ldd r4,Y+25 + ldd r8,Y+26 + ldd r12,Y+27 + ldd r24,Y+28 + ldd r5,Y+29 + ldd r9,Y+30 + ldd r13,Y+31 + ldd r25,Y+32 + ld r18,Z + ldd r19,Z+4 + ldd r20,Z+8 + ldd r21,Z+12 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + st Z,r18 + std Z+4,r19 + std Z+8,r20 + std Z+12,r21 + ldd r18,Z+1 + ldd r19,Z+5 + ldd r20,Z+9 + ldd r21,Z+13 + eor r18,r2 + eor r19,r3 + eor r20,r4 + eor r21,r5 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + std Z+1,r18 + std Z+5,r19 + std Z+9,r20 + std Z+13,r21 + ldd r18,Z+2 + ldd r19,Z+6 + ldd r20,Z+10 + ldd r21,Z+14 + eor r18,r2 + eor r19,r3 + eor r20,r4 + eor r21,r5 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+2,r18 + std Z+6,r19 + std Z+10,r20 + std Z+14,r21 + ldd r18,Z+3 + ldd r19,Z+7 + ldd r20,Z+11 + ldd r21,Z+15 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+3,r18 + std Z+7,r19 + std Z+11,r20 + std Z+15,r21 + ldd r18,Z+16 + ldd r19,Z+20 + ldd r20,Z+24 + ldd r21,Z+28 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+16,r18 + std Z+20,r19 + std Z+24,r20 + std Z+28,r21 + ldd r18,Z+17 + ldd r19,Z+21 + ldd r20,Z+25 + ldd r21,Z+29 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + std Z+17,r18 + std Z+21,r19 + std Z+25,r20 + std Z+29,r21 + ldd r18,Z+18 + ldd r19,Z+22 + ldd r20,Z+26 + ldd r21,Z+30 + eor r18,r2 + eor r19,r3 + eor r20,r4 + eor r21,r5 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+18,r18 + std Z+22,r19 + std Z+26,r20 + std Z+30,r21 + ldd r18,Z+19 + ldd r19,Z+23 + ldd r20,Z+27 + ldd r21,Z+31 + eor r18,r2 + eor r19,r3 + eor r20,r4 + eor r21,r5 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + std Z+19,r18 + std Z+23,r19 + std Z+27,r20 + std Z+31,r21 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r24,Z+14 + ldd r25,Z+15 + ret +1960: + bst r2,0 + bld r18,0 + bst r6,0 + bld r18,1 + bst r10,0 + bld r18,2 + bst r14,0 + bld r18,3 + bst r2,1 + bld r18,4 + bst r6,1 + bld r18,5 + bst r10,1 + bld r18,6 + bst r14,1 + bld r18,7 + bst r2,2 + bld r19,0 + bst r6,2 + bld r19,1 + bst r10,2 + bld r19,2 + bst r14,2 + bld r19,3 + bst r2,3 + bld r19,4 + bst r6,3 + bld r19,5 + bst r10,3 + bld r19,6 + bst r14,3 + bld r19,7 + bst r2,4 + bld r20,0 + bst r6,4 + bld r20,1 + bst r10,4 + bld r20,2 + bst r14,4 + bld r20,3 + bst r2,5 + bld r20,4 + bst r6,5 + bld r20,5 + bst r10,5 + bld r20,6 + bst r14,5 + bld r20,7 + bst r2,6 + bld r21,0 + bst r6,6 + bld r21,1 + bst r10,6 + bld r21,2 + bst r14,6 + bld r21,3 + bst r2,7 + bld r21,4 + bst r6,7 + bld r21,5 + bst r10,7 + bld r21,6 + bst r14,7 + bld r21,7 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + bst r3,0 + bld r18,0 + bst r7,0 + bld r18,1 + bst r11,0 + bld r18,2 + bst r15,0 + bld r18,3 + bst r3,1 + bld r18,4 + bst r7,1 + bld r18,5 + bst r11,1 + bld r18,6 + bst r15,1 + bld r18,7 + bst r3,2 + bld r19,0 + bst r7,2 + bld r19,1 + bst r11,2 + bld r19,2 + bst r15,2 + bld r19,3 + bst r3,3 + bld r19,4 + bst r7,3 + bld r19,5 + bst r11,3 + bld r19,6 + bst r15,3 + bld r19,7 + bst r3,4 + bld r20,0 + bst r7,4 + bld r20,1 + bst r11,4 + bld r20,2 + bst r15,4 + bld r20,3 + bst r3,5 + bld r20,4 + bst r7,5 + bld r20,5 + bst r11,5 + bld r20,6 + bst r15,5 + bld r20,7 + bst r3,6 + bld r21,0 + bst r7,6 + bld r21,1 + bst r11,6 + bld r21,2 + bst r15,6 + bld r21,3 + bst r3,7 + bld r21,4 + bst r7,7 + bld r21,5 + bst r11,7 + bld r21,6 + bst r15,7 + bld r21,7 + std Z+4,r18 + std Z+5,r19 + std Z+6,r20 + std Z+7,r21 + bst r4,0 + bld r18,0 + bst r8,0 + bld r18,1 + bst r12,0 + bld r18,2 + bst r24,0 + bld r18,3 + bst r4,1 + bld r18,4 + bst r8,1 + bld r18,5 + bst r12,1 + bld r18,6 + bst r24,1 + bld r18,7 + bst r4,2 + bld r19,0 + bst r8,2 + bld r19,1 + bst r12,2 + bld r19,2 + bst r24,2 + bld r19,3 + bst r4,3 + bld r19,4 + bst r8,3 + bld r19,5 + bst r12,3 + bld r19,6 + bst r24,3 + bld r19,7 + bst r4,4 + bld r20,0 + bst r8,4 + bld r20,1 + bst r12,4 + bld r20,2 + bst r24,4 + bld r20,3 + bst r4,5 + bld r20,4 + bst r8,5 + bld r20,5 + bst r12,5 + bld r20,6 + bst r24,5 + bld r20,7 + bst r4,6 + bld r21,0 + bst r8,6 + bld r21,1 + bst r12,6 + bld r21,2 + bst r24,6 + bld r21,3 + bst r4,7 + bld r21,4 + bst r8,7 + bld r21,5 + bst r12,7 + bld r21,6 + bst r24,7 + bld r21,7 + std Z+8,r18 + std Z+9,r19 + std Z+10,r20 + std Z+11,r21 + bst r5,0 + bld r18,0 + bst r9,0 + bld r18,1 + bst r13,0 + bld r18,2 + bst r25,0 + bld r18,3 + bst r5,1 + bld r18,4 + bst r9,1 + bld r18,5 + bst r13,1 + bld r18,6 + bst r25,1 + bld r18,7 + bst r5,2 + bld r19,0 + bst r9,2 + bld r19,1 + bst r13,2 + bld r19,2 + bst r25,2 + bld r19,3 + bst r5,3 + bld r19,4 + bst r9,3 + bld r19,5 + bst r13,3 + bld r19,6 + bst r25,3 + bld r19,7 + bst r5,4 + bld r20,0 + bst r9,4 + bld r20,1 + bst r13,4 + bld r20,2 + bst r25,4 + bld r20,3 + bst r5,5 + bld r20,4 + bst r9,5 + bld r20,5 + bst r13,5 + bld r20,6 + bst r25,5 + bld r20,7 + bst r5,6 + bld r21,0 + bst r9,6 + bld r21,1 + bst r13,6 + bld r21,2 + bst r25,6 + bld r21,3 + bst r5,7 + bld r21,4 + bst r9,7 + bld r21,5 + bst r13,7 + bld r21,6 + bst r25,7 + bld r21,7 + std Z+12,r18 + std Z+13,r19 + std Z+14,r20 + std Z+15,r21 + ldd r2,Z+16 + ldd r3,Z+17 + ldd r4,Z+18 + ldd r5,Z+19 + ldd r6,Z+20 + ldd r7,Z+21 + ldd r8,Z+22 + ldd r9,Z+23 + ldd r10,Z+24 + ldd r11,Z+25 + ldd r12,Z+26 + ldd r13,Z+27 + ldd r14,Z+28 + ldd r15,Z+29 + ldd r24,Z+30 + ldd r25,Z+31 + bst r2,0 + bld r18,0 + bst r6,0 + bld r18,1 + bst r10,0 + bld r18,2 + bst r14,0 + bld r18,3 + bst r2,1 + bld r18,4 + bst r6,1 + bld r18,5 + bst r10,1 + bld r18,6 + bst r14,1 + bld r18,7 + bst r2,2 + bld r19,0 + bst r6,2 + bld r19,1 + bst r10,2 + bld r19,2 + bst r14,2 + bld r19,3 + bst r2,3 + bld r19,4 + bst r6,3 + bld r19,5 + bst r10,3 + bld r19,6 + bst r14,3 + bld r19,7 + bst r2,4 + bld r20,0 + bst r6,4 + bld r20,1 + bst r10,4 + bld r20,2 + bst r14,4 + bld r20,3 + bst r2,5 + bld r20,4 + bst r6,5 + bld r20,5 + bst r10,5 + bld r20,6 + bst r14,5 + bld r20,7 + bst r2,6 + bld r21,0 + bst r6,6 + bld r21,1 + bst r10,6 + bld r21,2 + bst r14,6 + bld r21,3 + bst r2,7 + bld r21,4 + bst r6,7 + bld r21,5 + bst r10,7 + bld r21,6 + bst r14,7 + bld r21,7 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + bst r3,0 + bld r18,0 + bst r7,0 + bld r18,1 + bst r11,0 + bld r18,2 + bst r15,0 + bld r18,3 + bst r3,1 + bld r18,4 + bst r7,1 + bld r18,5 + bst r11,1 + bld r18,6 + bst r15,1 + bld r18,7 + bst r3,2 + bld r19,0 + bst r7,2 + bld r19,1 + bst r11,2 + bld r19,2 + bst r15,2 + bld r19,3 + bst r3,3 + bld r19,4 + bst r7,3 + bld r19,5 + bst r11,3 + bld r19,6 + bst r15,3 + bld r19,7 + bst r3,4 + bld r20,0 + bst r7,4 + bld r20,1 + bst r11,4 + bld r20,2 + bst r15,4 + bld r20,3 + bst r3,5 + bld r20,4 + bst r7,5 + bld r20,5 + bst r11,5 + bld r20,6 + bst r15,5 + bld r20,7 + bst r3,6 + bld r21,0 + bst r7,6 + bld r21,1 + bst r11,6 + bld r21,2 + bst r15,6 + bld r21,3 + bst r3,7 + bld r21,4 + bst r7,7 + bld r21,5 + bst r11,7 + bld r21,6 + bst r15,7 + bld r21,7 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + bst r4,0 + bld r18,0 + bst r8,0 + bld r18,1 + bst r12,0 + bld r18,2 + bst r24,0 + bld r18,3 + bst r4,1 + bld r18,4 + bst r8,1 + bld r18,5 + bst r12,1 + bld r18,6 + bst r24,1 + bld r18,7 + bst r4,2 + bld r19,0 + bst r8,2 + bld r19,1 + bst r12,2 + bld r19,2 + bst r24,2 + bld r19,3 + bst r4,3 + bld r19,4 + bst r8,3 + bld r19,5 + bst r12,3 + bld r19,6 + bst r24,3 + bld r19,7 + bst r4,4 + bld r20,0 + bst r8,4 + bld r20,1 + bst r12,4 + bld r20,2 + bst r24,4 + bld r20,3 + bst r4,5 + bld r20,4 + bst r8,5 + bld r20,5 + bst r12,5 + bld r20,6 + bst r24,5 + bld r20,7 + bst r4,6 + bld r21,0 + bst r8,6 + bld r21,1 + bst r12,6 + bld r21,2 + bst r24,6 + bld r21,3 + bst r4,7 + bld r21,4 + bst r8,7 + bld r21,5 + bst r12,7 + bld r21,6 + bst r24,7 + bld r21,7 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + bst r5,0 + bld r18,0 + bst r9,0 + bld r18,1 + bst r13,0 + bld r18,2 + bst r25,0 + bld r18,3 + bst r5,1 + bld r18,4 + bst r9,1 + bld r18,5 + bst r13,1 + bld r18,6 + bst r25,1 + bld r18,7 + bst r5,2 + bld r19,0 + bst r9,2 + bld r19,1 + bst r13,2 + bld r19,2 + bst r25,2 + bld r19,3 + bst r5,3 + bld r19,4 + bst r9,3 + bld r19,5 + bst r13,3 + bld r19,6 + bst r25,3 + bld r19,7 + bst r5,4 + bld r20,0 + bst r9,4 + bld r20,1 + bst r13,4 + bld r20,2 + bst r25,4 + bld r20,3 + bst r5,5 + bld r20,4 + bst r9,5 + bld r20,5 + bst r13,5 + bld r20,6 + bst r25,5 + bld r20,7 + bst r5,6 + bld r21,0 + bst r9,6 + bld r21,1 + bst r13,6 + bld r21,2 + bst r25,6 + bld r21,3 + bst r5,7 + bld r21,4 + bst r9,7 + bld r21,5 + bst r13,7 + bld r21,6 + bst r25,7 + bld r21,7 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size photon256_permute, .-photon256_permute + +#endif diff --git a/orange/Implementations/crypto_aead/orangezestv1/rhys/internal-photon256.c b/orange/Implementations/crypto_aead/orangezestv1/rhys/internal-photon256.c index b8743fe..5cb7dd1 100644 --- a/orange/Implementations/crypto_aead/orangezestv1/rhys/internal-photon256.c +++ b/orange/Implementations/crypto_aead/orangezestv1/rhys/internal-photon256.c @@ -23,6 +23,8 @@ #include "internal-photon256.h" #include "internal-util.h" +#if !defined(__AVR__) + /** * \brief Number of rounds in the PHOTON-256 permutation in bit-sliced form. */ @@ -477,3 +479,5 @@ void photon256_permute(unsigned char state[PHOTON256_STATE_SIZE]) /* Convert back from bit-sliced form to regular form */ photon256_from_sliced(state, S.bytes); } + +#endif /* !__AVR__ */ diff --git a/orange/Implementations/crypto_hash/orangishv1/rhys/internal-photon256-avr.S b/orange/Implementations/crypto_hash/orangishv1/rhys/internal-photon256-avr.S new file mode 100644 index 0000000..5826dd3 --- /dev/null +++ b/orange/Implementations/crypto_hash/orangishv1/rhys/internal-photon256-avr.S @@ -0,0 +1,2583 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .text +.global photon256_permute + .type photon256_permute, @function +photon256_permute: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 49 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + bst r18,0 + bld r2,0 + bst r18,1 + bld r6,0 + bst r18,2 + bld r10,0 + bst r18,3 + bld r14,0 + bst r18,4 + bld r2,1 + bst r18,5 + bld r6,1 + bst r18,6 + bld r10,1 + bst r18,7 + bld r14,1 + bst r19,0 + bld r2,2 + bst r19,1 + bld r6,2 + bst r19,2 + bld r10,2 + bst r19,3 + bld r14,2 + bst r19,4 + bld r2,3 + bst r19,5 + bld r6,3 + bst r19,6 + bld r10,3 + bst r19,7 + bld r14,3 + bst r20,0 + bld r2,4 + bst r20,1 + bld r6,4 + bst r20,2 + bld r10,4 + bst r20,3 + bld r14,4 + bst r20,4 + bld r2,5 + bst r20,5 + bld r6,5 + bst r20,6 + bld r10,5 + bst r20,7 + bld r14,5 + bst r21,0 + bld r2,6 + bst r21,1 + bld r6,6 + bst r21,2 + bld r10,6 + bst r21,3 + bld r14,6 + bst r21,4 + bld r2,7 + bst r21,5 + bld r6,7 + bst r21,6 + bld r10,7 + bst r21,7 + bld r14,7 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + bst r18,0 + bld r3,0 + bst r18,1 + bld r7,0 + bst r18,2 + bld r11,0 + bst r18,3 + bld r15,0 + bst r18,4 + bld r3,1 + bst r18,5 + bld r7,1 + bst r18,6 + bld r11,1 + bst r18,7 + bld r15,1 + bst r19,0 + bld r3,2 + bst r19,1 + bld r7,2 + bst r19,2 + bld r11,2 + bst r19,3 + bld r15,2 + bst r19,4 + bld r3,3 + bst r19,5 + bld r7,3 + bst r19,6 + bld r11,3 + bst r19,7 + bld r15,3 + bst r20,0 + bld r3,4 + bst r20,1 + bld r7,4 + bst r20,2 + bld r11,4 + bst r20,3 + bld r15,4 + bst r20,4 + bld r3,5 + bst r20,5 + bld r7,5 + bst r20,6 + bld r11,5 + bst r20,7 + bld r15,5 + bst r21,0 + bld r3,6 + bst r21,1 + bld r7,6 + bst r21,2 + bld r11,6 + bst r21,3 + bld r15,6 + bst r21,4 + bld r3,7 + bst r21,5 + bld r7,7 + bst r21,6 + bld r11,7 + bst r21,7 + bld r15,7 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + bst r18,0 + bld r4,0 + bst r18,1 + bld r8,0 + bst r18,2 + bld r12,0 + bst r18,3 + bld r24,0 + bst r18,4 + bld r4,1 + bst r18,5 + bld r8,1 + bst r18,6 + bld r12,1 + bst r18,7 + bld r24,1 + bst r19,0 + bld r4,2 + bst r19,1 + bld r8,2 + bst r19,2 + bld r12,2 + bst r19,3 + bld r24,2 + bst r19,4 + bld r4,3 + bst r19,5 + bld r8,3 + bst r19,6 + bld r12,3 + bst r19,7 + bld r24,3 + bst r20,0 + bld r4,4 + bst r20,1 + bld r8,4 + bst r20,2 + bld r12,4 + bst r20,3 + bld r24,4 + bst r20,4 + bld r4,5 + bst r20,5 + bld r8,5 + bst r20,6 + bld r12,5 + bst r20,7 + bld r24,5 + bst r21,0 + bld r4,6 + bst r21,1 + bld r8,6 + bst r21,2 + bld r12,6 + bst r21,3 + bld r24,6 + bst r21,4 + bld r4,7 + bst r21,5 + bld r8,7 + bst r21,6 + bld r12,7 + bst r21,7 + bld r24,7 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + bst r18,0 + bld r5,0 + bst r18,1 + bld r9,0 + bst r18,2 + bld r13,0 + bst r18,3 + bld r25,0 + bst r18,4 + bld r5,1 + bst r18,5 + bld r9,1 + bst r18,6 + bld r13,1 + bst r18,7 + bld r25,1 + bst r19,0 + bld r5,2 + bst r19,1 + bld r9,2 + bst r19,2 + bld r13,2 + bst r19,3 + bld r25,2 + bst r19,4 + bld r5,3 + bst r19,5 + bld r9,3 + bst r19,6 + bld r13,3 + bst r19,7 + bld r25,3 + bst r20,0 + bld r5,4 + bst r20,1 + bld r9,4 + bst r20,2 + bld r13,4 + bst r20,3 + bld r25,4 + bst r20,4 + bld r5,5 + bst r20,5 + bld r9,5 + bst r20,6 + bld r13,5 + bst r20,7 + bld r25,5 + bst r21,0 + bld r5,6 + bst r21,1 + bld r9,6 + bst r21,2 + bld r13,6 + bst r21,3 + bld r25,6 + bst r21,4 + bld r5,7 + bst r21,5 + bld r9,7 + bst r21,6 + bld r13,7 + bst r21,7 + bld r25,7 + std Z+16,r2 + std Z+17,r3 + std Z+18,r4 + std Z+19,r5 + std Z+20,r6 + std Z+21,r7 + std Z+22,r8 + std Z+23,r9 + std Z+24,r10 + std Z+25,r11 + std Z+26,r12 + std Z+27,r13 + std Z+28,r14 + std Z+29,r15 + std Z+30,r24 + std Z+31,r25 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + bst r18,0 + bld r2,0 + bst r18,1 + bld r6,0 + bst r18,2 + bld r10,0 + bst r18,3 + bld r14,0 + bst r18,4 + bld r2,1 + bst r18,5 + bld r6,1 + bst r18,6 + bld r10,1 + bst r18,7 + bld r14,1 + bst r19,0 + bld r2,2 + bst r19,1 + bld r6,2 + bst r19,2 + bld r10,2 + bst r19,3 + bld r14,2 + bst r19,4 + bld r2,3 + bst r19,5 + bld r6,3 + bst r19,6 + bld r10,3 + bst r19,7 + bld r14,3 + bst r20,0 + bld r2,4 + bst r20,1 + bld r6,4 + bst r20,2 + bld r10,4 + bst r20,3 + bld r14,4 + bst r20,4 + bld r2,5 + bst r20,5 + bld r6,5 + bst r20,6 + bld r10,5 + bst r20,7 + bld r14,5 + bst r21,0 + bld r2,6 + bst r21,1 + bld r6,6 + bst r21,2 + bld r10,6 + bst r21,3 + bld r14,6 + bst r21,4 + bld r2,7 + bst r21,5 + bld r6,7 + bst r21,6 + bld r10,7 + bst r21,7 + bld r14,7 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r20,Z+6 + ldd r21,Z+7 + bst r18,0 + bld r3,0 + bst r18,1 + bld r7,0 + bst r18,2 + bld r11,0 + bst r18,3 + bld r15,0 + bst r18,4 + bld r3,1 + bst r18,5 + bld r7,1 + bst r18,6 + bld r11,1 + bst r18,7 + bld r15,1 + bst r19,0 + bld r3,2 + bst r19,1 + bld r7,2 + bst r19,2 + bld r11,2 + bst r19,3 + bld r15,2 + bst r19,4 + bld r3,3 + bst r19,5 + bld r7,3 + bst r19,6 + bld r11,3 + bst r19,7 + bld r15,3 + bst r20,0 + bld r3,4 + bst r20,1 + bld r7,4 + bst r20,2 + bld r11,4 + bst r20,3 + bld r15,4 + bst r20,4 + bld r3,5 + bst r20,5 + bld r7,5 + bst r20,6 + bld r11,5 + bst r20,7 + bld r15,5 + bst r21,0 + bld r3,6 + bst r21,1 + bld r7,6 + bst r21,2 + bld r11,6 + bst r21,3 + bld r15,6 + bst r21,4 + bld r3,7 + bst r21,5 + bld r7,7 + bst r21,6 + bld r11,7 + bst r21,7 + bld r15,7 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r20,Z+10 + ldd r21,Z+11 + bst r18,0 + bld r4,0 + bst r18,1 + bld r8,0 + bst r18,2 + bld r12,0 + bst r18,3 + bld r24,0 + bst r18,4 + bld r4,1 + bst r18,5 + bld r8,1 + bst r18,6 + bld r12,1 + bst r18,7 + bld r24,1 + bst r19,0 + bld r4,2 + bst r19,1 + bld r8,2 + bst r19,2 + bld r12,2 + bst r19,3 + bld r24,2 + bst r19,4 + bld r4,3 + bst r19,5 + bld r8,3 + bst r19,6 + bld r12,3 + bst r19,7 + bld r24,3 + bst r20,0 + bld r4,4 + bst r20,1 + bld r8,4 + bst r20,2 + bld r12,4 + bst r20,3 + bld r24,4 + bst r20,4 + bld r4,5 + bst r20,5 + bld r8,5 + bst r20,6 + bld r12,5 + bst r20,7 + bld r24,5 + bst r21,0 + bld r4,6 + bst r21,1 + bld r8,6 + bst r21,2 + bld r12,6 + bst r21,3 + bld r24,6 + bst r21,4 + bld r4,7 + bst r21,5 + bld r8,7 + bst r21,6 + bld r12,7 + bst r21,7 + bld r24,7 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r20,Z+14 + ldd r21,Z+15 + bst r18,0 + bld r5,0 + bst r18,1 + bld r9,0 + bst r18,2 + bld r13,0 + bst r18,3 + bld r25,0 + bst r18,4 + bld r5,1 + bst r18,5 + bld r9,1 + bst r18,6 + bld r13,1 + bst r18,7 + bld r25,1 + bst r19,0 + bld r5,2 + bst r19,1 + bld r9,2 + bst r19,2 + bld r13,2 + bst r19,3 + bld r25,2 + bst r19,4 + bld r5,3 + bst r19,5 + bld r9,3 + bst r19,6 + bld r13,3 + bst r19,7 + bld r25,3 + bst r20,0 + bld r5,4 + bst r20,1 + bld r9,4 + bst r20,2 + bld r13,4 + bst r20,3 + bld r25,4 + bst r20,4 + bld r5,5 + bst r20,5 + bld r9,5 + bst r20,6 + bld r13,5 + bst r20,7 + bld r25,5 + bst r21,0 + bld r5,6 + bst r21,1 + bld r9,6 + bst r21,2 + bld r13,6 + bst r21,3 + bld r25,6 + bst r21,4 + bld r5,7 + bst r21,5 + bld r9,7 + bst r21,6 + bld r13,7 + bst r21,7 + bld r25,7 + ldi r22,225 + ldi r23,240 + ldi r26,210 + ldi r27,150 + rcall 621f + ldi r22,195 + ldi r23,210 + ldi r26,240 + ldi r27,180 + rcall 621f + ldi r22,135 + ldi r23,150 + ldi r26,180 + ldi r27,240 + rcall 621f + ldi r22,30 + ldi r23,15 + ldi r26,45 + ldi r27,105 + rcall 621f + ldi r22,45 + ldi r23,60 + ldi r26,30 + ldi r27,90 + rcall 621f + ldi r22,75 + ldi r23,90 + ldi r26,120 + ldi r27,60 + rcall 621f + ldi r22,150 + ldi r23,135 + ldi r26,165 + ldi r27,225 + rcall 621f + ldi r22,60 + ldi r23,45 + ldi r26,15 + ldi r27,75 + rcall 621f + ldi r22,105 + ldi r23,120 + ldi r26,90 + ldi r27,30 + rcall 621f + ldi r22,210 + ldi r23,195 + ldi r26,225 + ldi r27,165 + rcall 621f + ldi r22,165 + ldi r23,180 + ldi r26,150 + ldi r27,210 + rcall 621f + ldi r22,90 + ldi r23,75 + ldi r26,105 + ldi r27,45 + rcall 621f + rjmp 1960f +621: + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r2,r18 + eor r3,r19 + eor r4,r20 + eor r5,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r6,r18 + eor r7,r19 + eor r8,r20 + eor r9,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r10,r18 + eor r11,r19 + eor r12,r20 + eor r13,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r14,r18 + eor r15,r19 + eor r24,r20 + eor r25,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + eor r6,r10 + mov r0,r10 + and r0,r6 + eor r14,r0 + mov r18,r14 + and r14,r6 + eor r14,r10 + mov r16,r14 + eor r14,r2 + com r14 + mov r10,r14 + or r16,r2 + eor r2,r18 + eor r6,r2 + or r10,r6 + eor r10,r18 + eor r6,r16 + eor r14,r6 + eor r7,r11 + mov r0,r11 + and r0,r7 + eor r15,r0 + mov r19,r15 + and r15,r7 + eor r15,r11 + mov r16,r15 + eor r15,r3 + com r15 + mov r11,r15 + or r16,r3 + eor r3,r19 + eor r7,r3 + or r11,r7 + eor r11,r19 + eor r7,r16 + eor r15,r7 + eor r8,r12 + mov r0,r12 + and r0,r8 + eor r24,r0 + mov r20,r24 + and r24,r8 + eor r24,r12 + mov r16,r24 + eor r24,r4 + com r24 + mov r12,r24 + or r16,r4 + eor r4,r20 + eor r8,r4 + or r12,r8 + eor r12,r20 + eor r8,r16 + eor r24,r8 + eor r9,r13 + mov r0,r13 + and r0,r9 + eor r25,r0 + mov r21,r25 + and r25,r9 + eor r25,r13 + mov r16,r25 + eor r25,r5 + com r25 + mov r13,r25 + or r16,r5 + eor r5,r21 + eor r9,r5 + or r13,r9 + eor r13,r21 + eor r9,r16 + eor r25,r9 + bst r3,0 + lsr r3 + bld r3,7 + bst r7,0 + lsr r7 + bld r7,7 + bst r11,0 + lsr r11 + bld r11,7 + bst r15,0 + lsr r15 + bld r15,7 + mov r0,r1 + lsr r4 + ror r0 + lsr r4 + ror r0 + or r4,r0 + mov r0,r1 + lsr r8 + ror r0 + lsr r8 + ror r0 + or r8,r0 + mov r0,r1 + lsr r12 + ror r0 + lsr r12 + ror r0 + or r12,r0 + mov r0,r1 + lsr r24 + ror r0 + lsr r24 + ror r0 + or r24,r0 + mov r0,r1 + lsr r5 + ror r0 + lsr r5 + ror r0 + lsr r5 + ror r0 + or r5,r0 + mov r0,r1 + lsr r9 + ror r0 + lsr r9 + ror r0 + lsr r9 + ror r0 + or r9,r0 + mov r0,r1 + lsr r13 + ror r0 + lsr r13 + ror r0 + lsr r13 + ror r0 + or r13,r0 + mov r0,r1 + lsr r25 + ror r0 + lsr r25 + ror r0 + lsr r25 + ror r0 + or r25,r0 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Z+16 + ldd r3,Z+17 + ldd r4,Z+18 + ldd r5,Z+19 + ldd r6,Z+20 + ldd r7,Z+21 + ldd r8,Z+22 + ldd r9,Z+23 + ldd r10,Z+24 + ldd r11,Z+25 + ldd r12,Z+26 + ldd r13,Z+27 + ldd r14,Z+28 + ldd r15,Z+29 + ldd r24,Z+30 + ldd r25,Z+31 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r2,r18 + eor r3,r19 + eor r4,r20 + eor r5,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r6,r18 + eor r7,r19 + eor r8,r20 + eor r9,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r10,r18 + eor r11,r19 + eor r12,r20 + eor r13,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + eor r14,r22 + eor r15,r23 + eor r24,r26 + eor r25,r27 + eor r6,r10 + mov r0,r10 + and r0,r6 + eor r14,r0 + mov r18,r14 + and r14,r6 + eor r14,r10 + mov r22,r14 + eor r14,r2 + com r14 + mov r10,r14 + or r22,r2 + eor r2,r18 + eor r6,r2 + or r10,r6 + eor r10,r18 + eor r6,r22 + eor r14,r6 + eor r7,r11 + mov r0,r11 + and r0,r7 + eor r15,r0 + mov r19,r15 + and r15,r7 + eor r15,r11 + mov r22,r15 + eor r15,r3 + com r15 + mov r11,r15 + or r22,r3 + eor r3,r19 + eor r7,r3 + or r11,r7 + eor r11,r19 + eor r7,r22 + eor r15,r7 + eor r8,r12 + mov r0,r12 + and r0,r8 + eor r24,r0 + mov r20,r24 + and r24,r8 + eor r24,r12 + mov r22,r24 + eor r24,r4 + com r24 + mov r12,r24 + or r22,r4 + eor r4,r20 + eor r8,r4 + or r12,r8 + eor r12,r20 + eor r8,r22 + eor r24,r8 + eor r9,r13 + mov r0,r13 + and r0,r9 + eor r25,r0 + mov r21,r25 + and r25,r9 + eor r25,r13 + mov r22,r25 + eor r25,r5 + com r25 + mov r13,r25 + or r22,r5 + eor r5,r21 + eor r9,r5 + or r13,r9 + eor r13,r21 + eor r9,r22 + eor r25,r9 + swap r2 + swap r6 + swap r10 + swap r14 + lsl r3 + adc r3,r1 + lsl r3 + adc r3,r1 + lsl r3 + adc r3,r1 + lsl r7 + adc r7,r1 + lsl r7 + adc r7,r1 + lsl r7 + adc r7,r1 + lsl r11 + adc r11,r1 + lsl r11 + adc r11,r1 + lsl r11 + adc r11,r1 + lsl r15 + adc r15,r1 + lsl r15 + adc r15,r1 + lsl r15 + adc r15,r1 + lsl r4 + adc r4,r1 + lsl r4 + adc r4,r1 + lsl r8 + adc r8,r1 + lsl r8 + adc r8,r1 + lsl r12 + adc r12,r1 + lsl r12 + adc r12,r1 + lsl r24 + adc r24,r1 + lsl r24 + adc r24,r1 + lsl r5 + adc r5,r1 + lsl r9 + adc r9,r1 + lsl r13 + adc r13,r1 + lsl r25 + adc r25,r1 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r6,Y+2 + ldd r10,Y+3 + ldd r14,Y+4 + ldd r3,Y+5 + ldd r7,Y+6 + ldd r11,Y+7 + ldd r15,Y+8 + ldd r4,Y+9 + ldd r8,Y+10 + ldd r12,Y+11 + ldd r24,Y+12 + ldd r5,Y+13 + ldd r9,Y+14 + ldd r13,Y+15 + ldd r25,Y+16 + movw r22,r2 + movw r26,r4 + eor r22,r27 + mov r18,r27 + mov r19,r22 + mov r20,r23 + mov r21,r26 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + st Z,r18 + std Z+4,r19 + std Z+8,r20 + std Z+12,r21 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + movw r18,r26 + movw r20,r22 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+1,r18 + std Z+5,r19 + std Z+9,r20 + std Z+13,r21 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + movw r18,r26 + movw r20,r22 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+2,r18 + std Z+6,r19 + std Z+10,r20 + std Z+14,r21 + movw r18,r2 + movw r20,r4 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + std Z+3,r18 + std Z+7,r19 + std Z+11,r20 + std Z+15,r21 + movw r18,r2 + movw r20,r4 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+16,r18 + std Z+20,r19 + std Z+24,r20 + std Z+28,r21 + movw r18,r2 + movw r20,r4 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+17,r18 + std Z+21,r19 + std Z+25,r20 + std Z+29,r21 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + movw r18,r26 + movw r20,r22 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+18,r18 + std Z+22,r19 + std Z+26,r20 + std Z+30,r21 + movw r18,r2 + movw r20,r4 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+19,r18 + std Z+23,r19 + std Z+27,r20 + std Z+31,r21 + ldd r2,Y+17 + ldd r6,Y+18 + ldd r10,Y+19 + ldd r14,Y+20 + ldd r3,Y+21 + ldd r7,Y+22 + ldd r11,Y+23 + ldd r15,Y+24 + ldd r4,Y+25 + ldd r8,Y+26 + ldd r12,Y+27 + ldd r24,Y+28 + ldd r5,Y+29 + ldd r9,Y+30 + ldd r13,Y+31 + ldd r25,Y+32 + ld r18,Z + ldd r19,Z+4 + ldd r20,Z+8 + ldd r21,Z+12 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + st Z,r18 + std Z+4,r19 + std Z+8,r20 + std Z+12,r21 + ldd r18,Z+1 + ldd r19,Z+5 + ldd r20,Z+9 + ldd r21,Z+13 + eor r18,r2 + eor r19,r3 + eor r20,r4 + eor r21,r5 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + std Z+1,r18 + std Z+5,r19 + std Z+9,r20 + std Z+13,r21 + ldd r18,Z+2 + ldd r19,Z+6 + ldd r20,Z+10 + ldd r21,Z+14 + eor r18,r2 + eor r19,r3 + eor r20,r4 + eor r21,r5 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+2,r18 + std Z+6,r19 + std Z+10,r20 + std Z+14,r21 + ldd r18,Z+3 + ldd r19,Z+7 + ldd r20,Z+11 + ldd r21,Z+15 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+3,r18 + std Z+7,r19 + std Z+11,r20 + std Z+15,r21 + ldd r18,Z+16 + ldd r19,Z+20 + ldd r20,Z+24 + ldd r21,Z+28 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+16,r18 + std Z+20,r19 + std Z+24,r20 + std Z+28,r21 + ldd r18,Z+17 + ldd r19,Z+21 + ldd r20,Z+25 + ldd r21,Z+29 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + std Z+17,r18 + std Z+21,r19 + std Z+25,r20 + std Z+29,r21 + ldd r18,Z+18 + ldd r19,Z+22 + ldd r20,Z+26 + ldd r21,Z+30 + eor r18,r2 + eor r19,r3 + eor r20,r4 + eor r21,r5 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+18,r18 + std Z+22,r19 + std Z+26,r20 + std Z+30,r21 + ldd r18,Z+19 + ldd r19,Z+23 + ldd r20,Z+27 + ldd r21,Z+31 + eor r18,r2 + eor r19,r3 + eor r20,r4 + eor r21,r5 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + std Z+19,r18 + std Z+23,r19 + std Z+27,r20 + std Z+31,r21 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r24,Z+14 + ldd r25,Z+15 + ret +1960: + bst r2,0 + bld r18,0 + bst r6,0 + bld r18,1 + bst r10,0 + bld r18,2 + bst r14,0 + bld r18,3 + bst r2,1 + bld r18,4 + bst r6,1 + bld r18,5 + bst r10,1 + bld r18,6 + bst r14,1 + bld r18,7 + bst r2,2 + bld r19,0 + bst r6,2 + bld r19,1 + bst r10,2 + bld r19,2 + bst r14,2 + bld r19,3 + bst r2,3 + bld r19,4 + bst r6,3 + bld r19,5 + bst r10,3 + bld r19,6 + bst r14,3 + bld r19,7 + bst r2,4 + bld r20,0 + bst r6,4 + bld r20,1 + bst r10,4 + bld r20,2 + bst r14,4 + bld r20,3 + bst r2,5 + bld r20,4 + bst r6,5 + bld r20,5 + bst r10,5 + bld r20,6 + bst r14,5 + bld r20,7 + bst r2,6 + bld r21,0 + bst r6,6 + bld r21,1 + bst r10,6 + bld r21,2 + bst r14,6 + bld r21,3 + bst r2,7 + bld r21,4 + bst r6,7 + bld r21,5 + bst r10,7 + bld r21,6 + bst r14,7 + bld r21,7 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + bst r3,0 + bld r18,0 + bst r7,0 + bld r18,1 + bst r11,0 + bld r18,2 + bst r15,0 + bld r18,3 + bst r3,1 + bld r18,4 + bst r7,1 + bld r18,5 + bst r11,1 + bld r18,6 + bst r15,1 + bld r18,7 + bst r3,2 + bld r19,0 + bst r7,2 + bld r19,1 + bst r11,2 + bld r19,2 + bst r15,2 + bld r19,3 + bst r3,3 + bld r19,4 + bst r7,3 + bld r19,5 + bst r11,3 + bld r19,6 + bst r15,3 + bld r19,7 + bst r3,4 + bld r20,0 + bst r7,4 + bld r20,1 + bst r11,4 + bld r20,2 + bst r15,4 + bld r20,3 + bst r3,5 + bld r20,4 + bst r7,5 + bld r20,5 + bst r11,5 + bld r20,6 + bst r15,5 + bld r20,7 + bst r3,6 + bld r21,0 + bst r7,6 + bld r21,1 + bst r11,6 + bld r21,2 + bst r15,6 + bld r21,3 + bst r3,7 + bld r21,4 + bst r7,7 + bld r21,5 + bst r11,7 + bld r21,6 + bst r15,7 + bld r21,7 + std Z+4,r18 + std Z+5,r19 + std Z+6,r20 + std Z+7,r21 + bst r4,0 + bld r18,0 + bst r8,0 + bld r18,1 + bst r12,0 + bld r18,2 + bst r24,0 + bld r18,3 + bst r4,1 + bld r18,4 + bst r8,1 + bld r18,5 + bst r12,1 + bld r18,6 + bst r24,1 + bld r18,7 + bst r4,2 + bld r19,0 + bst r8,2 + bld r19,1 + bst r12,2 + bld r19,2 + bst r24,2 + bld r19,3 + bst r4,3 + bld r19,4 + bst r8,3 + bld r19,5 + bst r12,3 + bld r19,6 + bst r24,3 + bld r19,7 + bst r4,4 + bld r20,0 + bst r8,4 + bld r20,1 + bst r12,4 + bld r20,2 + bst r24,4 + bld r20,3 + bst r4,5 + bld r20,4 + bst r8,5 + bld r20,5 + bst r12,5 + bld r20,6 + bst r24,5 + bld r20,7 + bst r4,6 + bld r21,0 + bst r8,6 + bld r21,1 + bst r12,6 + bld r21,2 + bst r24,6 + bld r21,3 + bst r4,7 + bld r21,4 + bst r8,7 + bld r21,5 + bst r12,7 + bld r21,6 + bst r24,7 + bld r21,7 + std Z+8,r18 + std Z+9,r19 + std Z+10,r20 + std Z+11,r21 + bst r5,0 + bld r18,0 + bst r9,0 + bld r18,1 + bst r13,0 + bld r18,2 + bst r25,0 + bld r18,3 + bst r5,1 + bld r18,4 + bst r9,1 + bld r18,5 + bst r13,1 + bld r18,6 + bst r25,1 + bld r18,7 + bst r5,2 + bld r19,0 + bst r9,2 + bld r19,1 + bst r13,2 + bld r19,2 + bst r25,2 + bld r19,3 + bst r5,3 + bld r19,4 + bst r9,3 + bld r19,5 + bst r13,3 + bld r19,6 + bst r25,3 + bld r19,7 + bst r5,4 + bld r20,0 + bst r9,4 + bld r20,1 + bst r13,4 + bld r20,2 + bst r25,4 + bld r20,3 + bst r5,5 + bld r20,4 + bst r9,5 + bld r20,5 + bst r13,5 + bld r20,6 + bst r25,5 + bld r20,7 + bst r5,6 + bld r21,0 + bst r9,6 + bld r21,1 + bst r13,6 + bld r21,2 + bst r25,6 + bld r21,3 + bst r5,7 + bld r21,4 + bst r9,7 + bld r21,5 + bst r13,7 + bld r21,6 + bst r25,7 + bld r21,7 + std Z+12,r18 + std Z+13,r19 + std Z+14,r20 + std Z+15,r21 + ldd r2,Z+16 + ldd r3,Z+17 + ldd r4,Z+18 + ldd r5,Z+19 + ldd r6,Z+20 + ldd r7,Z+21 + ldd r8,Z+22 + ldd r9,Z+23 + ldd r10,Z+24 + ldd r11,Z+25 + ldd r12,Z+26 + ldd r13,Z+27 + ldd r14,Z+28 + ldd r15,Z+29 + ldd r24,Z+30 + ldd r25,Z+31 + bst r2,0 + bld r18,0 + bst r6,0 + bld r18,1 + bst r10,0 + bld r18,2 + bst r14,0 + bld r18,3 + bst r2,1 + bld r18,4 + bst r6,1 + bld r18,5 + bst r10,1 + bld r18,6 + bst r14,1 + bld r18,7 + bst r2,2 + bld r19,0 + bst r6,2 + bld r19,1 + bst r10,2 + bld r19,2 + bst r14,2 + bld r19,3 + bst r2,3 + bld r19,4 + bst r6,3 + bld r19,5 + bst r10,3 + bld r19,6 + bst r14,3 + bld r19,7 + bst r2,4 + bld r20,0 + bst r6,4 + bld r20,1 + bst r10,4 + bld r20,2 + bst r14,4 + bld r20,3 + bst r2,5 + bld r20,4 + bst r6,5 + bld r20,5 + bst r10,5 + bld r20,6 + bst r14,5 + bld r20,7 + bst r2,6 + bld r21,0 + bst r6,6 + bld r21,1 + bst r10,6 + bld r21,2 + bst r14,6 + bld r21,3 + bst r2,7 + bld r21,4 + bst r6,7 + bld r21,5 + bst r10,7 + bld r21,6 + bst r14,7 + bld r21,7 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + bst r3,0 + bld r18,0 + bst r7,0 + bld r18,1 + bst r11,0 + bld r18,2 + bst r15,0 + bld r18,3 + bst r3,1 + bld r18,4 + bst r7,1 + bld r18,5 + bst r11,1 + bld r18,6 + bst r15,1 + bld r18,7 + bst r3,2 + bld r19,0 + bst r7,2 + bld r19,1 + bst r11,2 + bld r19,2 + bst r15,2 + bld r19,3 + bst r3,3 + bld r19,4 + bst r7,3 + bld r19,5 + bst r11,3 + bld r19,6 + bst r15,3 + bld r19,7 + bst r3,4 + bld r20,0 + bst r7,4 + bld r20,1 + bst r11,4 + bld r20,2 + bst r15,4 + bld r20,3 + bst r3,5 + bld r20,4 + bst r7,5 + bld r20,5 + bst r11,5 + bld r20,6 + bst r15,5 + bld r20,7 + bst r3,6 + bld r21,0 + bst r7,6 + bld r21,1 + bst r11,6 + bld r21,2 + bst r15,6 + bld r21,3 + bst r3,7 + bld r21,4 + bst r7,7 + bld r21,5 + bst r11,7 + bld r21,6 + bst r15,7 + bld r21,7 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + bst r4,0 + bld r18,0 + bst r8,0 + bld r18,1 + bst r12,0 + bld r18,2 + bst r24,0 + bld r18,3 + bst r4,1 + bld r18,4 + bst r8,1 + bld r18,5 + bst r12,1 + bld r18,6 + bst r24,1 + bld r18,7 + bst r4,2 + bld r19,0 + bst r8,2 + bld r19,1 + bst r12,2 + bld r19,2 + bst r24,2 + bld r19,3 + bst r4,3 + bld r19,4 + bst r8,3 + bld r19,5 + bst r12,3 + bld r19,6 + bst r24,3 + bld r19,7 + bst r4,4 + bld r20,0 + bst r8,4 + bld r20,1 + bst r12,4 + bld r20,2 + bst r24,4 + bld r20,3 + bst r4,5 + bld r20,4 + bst r8,5 + bld r20,5 + bst r12,5 + bld r20,6 + bst r24,5 + bld r20,7 + bst r4,6 + bld r21,0 + bst r8,6 + bld r21,1 + bst r12,6 + bld r21,2 + bst r24,6 + bld r21,3 + bst r4,7 + bld r21,4 + bst r8,7 + bld r21,5 + bst r12,7 + bld r21,6 + bst r24,7 + bld r21,7 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + bst r5,0 + bld r18,0 + bst r9,0 + bld r18,1 + bst r13,0 + bld r18,2 + bst r25,0 + bld r18,3 + bst r5,1 + bld r18,4 + bst r9,1 + bld r18,5 + bst r13,1 + bld r18,6 + bst r25,1 + bld r18,7 + bst r5,2 + bld r19,0 + bst r9,2 + bld r19,1 + bst r13,2 + bld r19,2 + bst r25,2 + bld r19,3 + bst r5,3 + bld r19,4 + bst r9,3 + bld r19,5 + bst r13,3 + bld r19,6 + bst r25,3 + bld r19,7 + bst r5,4 + bld r20,0 + bst r9,4 + bld r20,1 + bst r13,4 + bld r20,2 + bst r25,4 + bld r20,3 + bst r5,5 + bld r20,4 + bst r9,5 + bld r20,5 + bst r13,5 + bld r20,6 + bst r25,5 + bld r20,7 + bst r5,6 + bld r21,0 + bst r9,6 + bld r21,1 + bst r13,6 + bld r21,2 + bst r25,6 + bld r21,3 + bst r5,7 + bld r21,4 + bst r9,7 + bld r21,5 + bst r13,7 + bld r21,6 + bst r25,7 + bld r21,7 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size photon256_permute, .-photon256_permute + +#endif diff --git a/orange/Implementations/crypto_hash/orangishv1/rhys/internal-photon256.c b/orange/Implementations/crypto_hash/orangishv1/rhys/internal-photon256.c index b8743fe..5cb7dd1 100644 --- a/orange/Implementations/crypto_hash/orangishv1/rhys/internal-photon256.c +++ b/orange/Implementations/crypto_hash/orangishv1/rhys/internal-photon256.c @@ -23,6 +23,8 @@ #include "internal-photon256.h" #include "internal-util.h" +#if !defined(__AVR__) + /** * \brief Number of rounds in the PHOTON-256 permutation in bit-sliced form. */ @@ -477,3 +479,5 @@ void photon256_permute(unsigned char state[PHOTON256_STATE_SIZE]) /* Convert back from bit-sliced form to regular form */ photon256_from_sliced(state, S.bytes); } + +#endif /* !__AVR__ */ diff --git a/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/rhys/internal-photon256-avr.S b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/rhys/internal-photon256-avr.S new file mode 100644 index 0000000..5826dd3 --- /dev/null +++ b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/rhys/internal-photon256-avr.S @@ -0,0 +1,2583 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .text +.global photon256_permute + .type photon256_permute, @function +photon256_permute: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 49 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + bst r18,0 + bld r2,0 + bst r18,1 + bld r6,0 + bst r18,2 + bld r10,0 + bst r18,3 + bld r14,0 + bst r18,4 + bld r2,1 + bst r18,5 + bld r6,1 + bst r18,6 + bld r10,1 + bst r18,7 + bld r14,1 + bst r19,0 + bld r2,2 + bst r19,1 + bld r6,2 + bst r19,2 + bld r10,2 + bst r19,3 + bld r14,2 + bst r19,4 + bld r2,3 + bst r19,5 + bld r6,3 + bst r19,6 + bld r10,3 + bst r19,7 + bld r14,3 + bst r20,0 + bld r2,4 + bst r20,1 + bld r6,4 + bst r20,2 + bld r10,4 + bst r20,3 + bld r14,4 + bst r20,4 + bld r2,5 + bst r20,5 + bld r6,5 + bst r20,6 + bld r10,5 + bst r20,7 + bld r14,5 + bst r21,0 + bld r2,6 + bst r21,1 + bld r6,6 + bst r21,2 + bld r10,6 + bst r21,3 + bld r14,6 + bst r21,4 + bld r2,7 + bst r21,5 + bld r6,7 + bst r21,6 + bld r10,7 + bst r21,7 + bld r14,7 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + bst r18,0 + bld r3,0 + bst r18,1 + bld r7,0 + bst r18,2 + bld r11,0 + bst r18,3 + bld r15,0 + bst r18,4 + bld r3,1 + bst r18,5 + bld r7,1 + bst r18,6 + bld r11,1 + bst r18,7 + bld r15,1 + bst r19,0 + bld r3,2 + bst r19,1 + bld r7,2 + bst r19,2 + bld r11,2 + bst r19,3 + bld r15,2 + bst r19,4 + bld r3,3 + bst r19,5 + bld r7,3 + bst r19,6 + bld r11,3 + bst r19,7 + bld r15,3 + bst r20,0 + bld r3,4 + bst r20,1 + bld r7,4 + bst r20,2 + bld r11,4 + bst r20,3 + bld r15,4 + bst r20,4 + bld r3,5 + bst r20,5 + bld r7,5 + bst r20,6 + bld r11,5 + bst r20,7 + bld r15,5 + bst r21,0 + bld r3,6 + bst r21,1 + bld r7,6 + bst r21,2 + bld r11,6 + bst r21,3 + bld r15,6 + bst r21,4 + bld r3,7 + bst r21,5 + bld r7,7 + bst r21,6 + bld r11,7 + bst r21,7 + bld r15,7 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + bst r18,0 + bld r4,0 + bst r18,1 + bld r8,0 + bst r18,2 + bld r12,0 + bst r18,3 + bld r24,0 + bst r18,4 + bld r4,1 + bst r18,5 + bld r8,1 + bst r18,6 + bld r12,1 + bst r18,7 + bld r24,1 + bst r19,0 + bld r4,2 + bst r19,1 + bld r8,2 + bst r19,2 + bld r12,2 + bst r19,3 + bld r24,2 + bst r19,4 + bld r4,3 + bst r19,5 + bld r8,3 + bst r19,6 + bld r12,3 + bst r19,7 + bld r24,3 + bst r20,0 + bld r4,4 + bst r20,1 + bld r8,4 + bst r20,2 + bld r12,4 + bst r20,3 + bld r24,4 + bst r20,4 + bld r4,5 + bst r20,5 + bld r8,5 + bst r20,6 + bld r12,5 + bst r20,7 + bld r24,5 + bst r21,0 + bld r4,6 + bst r21,1 + bld r8,6 + bst r21,2 + bld r12,6 + bst r21,3 + bld r24,6 + bst r21,4 + bld r4,7 + bst r21,5 + bld r8,7 + bst r21,6 + bld r12,7 + bst r21,7 + bld r24,7 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + bst r18,0 + bld r5,0 + bst r18,1 + bld r9,0 + bst r18,2 + bld r13,0 + bst r18,3 + bld r25,0 + bst r18,4 + bld r5,1 + bst r18,5 + bld r9,1 + bst r18,6 + bld r13,1 + bst r18,7 + bld r25,1 + bst r19,0 + bld r5,2 + bst r19,1 + bld r9,2 + bst r19,2 + bld r13,2 + bst r19,3 + bld r25,2 + bst r19,4 + bld r5,3 + bst r19,5 + bld r9,3 + bst r19,6 + bld r13,3 + bst r19,7 + bld r25,3 + bst r20,0 + bld r5,4 + bst r20,1 + bld r9,4 + bst r20,2 + bld r13,4 + bst r20,3 + bld r25,4 + bst r20,4 + bld r5,5 + bst r20,5 + bld r9,5 + bst r20,6 + bld r13,5 + bst r20,7 + bld r25,5 + bst r21,0 + bld r5,6 + bst r21,1 + bld r9,6 + bst r21,2 + bld r13,6 + bst r21,3 + bld r25,6 + bst r21,4 + bld r5,7 + bst r21,5 + bld r9,7 + bst r21,6 + bld r13,7 + bst r21,7 + bld r25,7 + std Z+16,r2 + std Z+17,r3 + std Z+18,r4 + std Z+19,r5 + std Z+20,r6 + std Z+21,r7 + std Z+22,r8 + std Z+23,r9 + std Z+24,r10 + std Z+25,r11 + std Z+26,r12 + std Z+27,r13 + std Z+28,r14 + std Z+29,r15 + std Z+30,r24 + std Z+31,r25 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + bst r18,0 + bld r2,0 + bst r18,1 + bld r6,0 + bst r18,2 + bld r10,0 + bst r18,3 + bld r14,0 + bst r18,4 + bld r2,1 + bst r18,5 + bld r6,1 + bst r18,6 + bld r10,1 + bst r18,7 + bld r14,1 + bst r19,0 + bld r2,2 + bst r19,1 + bld r6,2 + bst r19,2 + bld r10,2 + bst r19,3 + bld r14,2 + bst r19,4 + bld r2,3 + bst r19,5 + bld r6,3 + bst r19,6 + bld r10,3 + bst r19,7 + bld r14,3 + bst r20,0 + bld r2,4 + bst r20,1 + bld r6,4 + bst r20,2 + bld r10,4 + bst r20,3 + bld r14,4 + bst r20,4 + bld r2,5 + bst r20,5 + bld r6,5 + bst r20,6 + bld r10,5 + bst r20,7 + bld r14,5 + bst r21,0 + bld r2,6 + bst r21,1 + bld r6,6 + bst r21,2 + bld r10,6 + bst r21,3 + bld r14,6 + bst r21,4 + bld r2,7 + bst r21,5 + bld r6,7 + bst r21,6 + bld r10,7 + bst r21,7 + bld r14,7 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r20,Z+6 + ldd r21,Z+7 + bst r18,0 + bld r3,0 + bst r18,1 + bld r7,0 + bst r18,2 + bld r11,0 + bst r18,3 + bld r15,0 + bst r18,4 + bld r3,1 + bst r18,5 + bld r7,1 + bst r18,6 + bld r11,1 + bst r18,7 + bld r15,1 + bst r19,0 + bld r3,2 + bst r19,1 + bld r7,2 + bst r19,2 + bld r11,2 + bst r19,3 + bld r15,2 + bst r19,4 + bld r3,3 + bst r19,5 + bld r7,3 + bst r19,6 + bld r11,3 + bst r19,7 + bld r15,3 + bst r20,0 + bld r3,4 + bst r20,1 + bld r7,4 + bst r20,2 + bld r11,4 + bst r20,3 + bld r15,4 + bst r20,4 + bld r3,5 + bst r20,5 + bld r7,5 + bst r20,6 + bld r11,5 + bst r20,7 + bld r15,5 + bst r21,0 + bld r3,6 + bst r21,1 + bld r7,6 + bst r21,2 + bld r11,6 + bst r21,3 + bld r15,6 + bst r21,4 + bld r3,7 + bst r21,5 + bld r7,7 + bst r21,6 + bld r11,7 + bst r21,7 + bld r15,7 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r20,Z+10 + ldd r21,Z+11 + bst r18,0 + bld r4,0 + bst r18,1 + bld r8,0 + bst r18,2 + bld r12,0 + bst r18,3 + bld r24,0 + bst r18,4 + bld r4,1 + bst r18,5 + bld r8,1 + bst r18,6 + bld r12,1 + bst r18,7 + bld r24,1 + bst r19,0 + bld r4,2 + bst r19,1 + bld r8,2 + bst r19,2 + bld r12,2 + bst r19,3 + bld r24,2 + bst r19,4 + bld r4,3 + bst r19,5 + bld r8,3 + bst r19,6 + bld r12,3 + bst r19,7 + bld r24,3 + bst r20,0 + bld r4,4 + bst r20,1 + bld r8,4 + bst r20,2 + bld r12,4 + bst r20,3 + bld r24,4 + bst r20,4 + bld r4,5 + bst r20,5 + bld r8,5 + bst r20,6 + bld r12,5 + bst r20,7 + bld r24,5 + bst r21,0 + bld r4,6 + bst r21,1 + bld r8,6 + bst r21,2 + bld r12,6 + bst r21,3 + bld r24,6 + bst r21,4 + bld r4,7 + bst r21,5 + bld r8,7 + bst r21,6 + bld r12,7 + bst r21,7 + bld r24,7 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r20,Z+14 + ldd r21,Z+15 + bst r18,0 + bld r5,0 + bst r18,1 + bld r9,0 + bst r18,2 + bld r13,0 + bst r18,3 + bld r25,0 + bst r18,4 + bld r5,1 + bst r18,5 + bld r9,1 + bst r18,6 + bld r13,1 + bst r18,7 + bld r25,1 + bst r19,0 + bld r5,2 + bst r19,1 + bld r9,2 + bst r19,2 + bld r13,2 + bst r19,3 + bld r25,2 + bst r19,4 + bld r5,3 + bst r19,5 + bld r9,3 + bst r19,6 + bld r13,3 + bst r19,7 + bld r25,3 + bst r20,0 + bld r5,4 + bst r20,1 + bld r9,4 + bst r20,2 + bld r13,4 + bst r20,3 + bld r25,4 + bst r20,4 + bld r5,5 + bst r20,5 + bld r9,5 + bst r20,6 + bld r13,5 + bst r20,7 + bld r25,5 + bst r21,0 + bld r5,6 + bst r21,1 + bld r9,6 + bst r21,2 + bld r13,6 + bst r21,3 + bld r25,6 + bst r21,4 + bld r5,7 + bst r21,5 + bld r9,7 + bst r21,6 + bld r13,7 + bst r21,7 + bld r25,7 + ldi r22,225 + ldi r23,240 + ldi r26,210 + ldi r27,150 + rcall 621f + ldi r22,195 + ldi r23,210 + ldi r26,240 + ldi r27,180 + rcall 621f + ldi r22,135 + ldi r23,150 + ldi r26,180 + ldi r27,240 + rcall 621f + ldi r22,30 + ldi r23,15 + ldi r26,45 + ldi r27,105 + rcall 621f + ldi r22,45 + ldi r23,60 + ldi r26,30 + ldi r27,90 + rcall 621f + ldi r22,75 + ldi r23,90 + ldi r26,120 + ldi r27,60 + rcall 621f + ldi r22,150 + ldi r23,135 + ldi r26,165 + ldi r27,225 + rcall 621f + ldi r22,60 + ldi r23,45 + ldi r26,15 + ldi r27,75 + rcall 621f + ldi r22,105 + ldi r23,120 + ldi r26,90 + ldi r27,30 + rcall 621f + ldi r22,210 + ldi r23,195 + ldi r26,225 + ldi r27,165 + rcall 621f + ldi r22,165 + ldi r23,180 + ldi r26,150 + ldi r27,210 + rcall 621f + ldi r22,90 + ldi r23,75 + ldi r26,105 + ldi r27,45 + rcall 621f + rjmp 1960f +621: + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r2,r18 + eor r3,r19 + eor r4,r20 + eor r5,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r6,r18 + eor r7,r19 + eor r8,r20 + eor r9,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r10,r18 + eor r11,r19 + eor r12,r20 + eor r13,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r14,r18 + eor r15,r19 + eor r24,r20 + eor r25,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + eor r6,r10 + mov r0,r10 + and r0,r6 + eor r14,r0 + mov r18,r14 + and r14,r6 + eor r14,r10 + mov r16,r14 + eor r14,r2 + com r14 + mov r10,r14 + or r16,r2 + eor r2,r18 + eor r6,r2 + or r10,r6 + eor r10,r18 + eor r6,r16 + eor r14,r6 + eor r7,r11 + mov r0,r11 + and r0,r7 + eor r15,r0 + mov r19,r15 + and r15,r7 + eor r15,r11 + mov r16,r15 + eor r15,r3 + com r15 + mov r11,r15 + or r16,r3 + eor r3,r19 + eor r7,r3 + or r11,r7 + eor r11,r19 + eor r7,r16 + eor r15,r7 + eor r8,r12 + mov r0,r12 + and r0,r8 + eor r24,r0 + mov r20,r24 + and r24,r8 + eor r24,r12 + mov r16,r24 + eor r24,r4 + com r24 + mov r12,r24 + or r16,r4 + eor r4,r20 + eor r8,r4 + or r12,r8 + eor r12,r20 + eor r8,r16 + eor r24,r8 + eor r9,r13 + mov r0,r13 + and r0,r9 + eor r25,r0 + mov r21,r25 + and r25,r9 + eor r25,r13 + mov r16,r25 + eor r25,r5 + com r25 + mov r13,r25 + or r16,r5 + eor r5,r21 + eor r9,r5 + or r13,r9 + eor r13,r21 + eor r9,r16 + eor r25,r9 + bst r3,0 + lsr r3 + bld r3,7 + bst r7,0 + lsr r7 + bld r7,7 + bst r11,0 + lsr r11 + bld r11,7 + bst r15,0 + lsr r15 + bld r15,7 + mov r0,r1 + lsr r4 + ror r0 + lsr r4 + ror r0 + or r4,r0 + mov r0,r1 + lsr r8 + ror r0 + lsr r8 + ror r0 + or r8,r0 + mov r0,r1 + lsr r12 + ror r0 + lsr r12 + ror r0 + or r12,r0 + mov r0,r1 + lsr r24 + ror r0 + lsr r24 + ror r0 + or r24,r0 + mov r0,r1 + lsr r5 + ror r0 + lsr r5 + ror r0 + lsr r5 + ror r0 + or r5,r0 + mov r0,r1 + lsr r9 + ror r0 + lsr r9 + ror r0 + lsr r9 + ror r0 + or r9,r0 + mov r0,r1 + lsr r13 + ror r0 + lsr r13 + ror r0 + lsr r13 + ror r0 + or r13,r0 + mov r0,r1 + lsr r25 + ror r0 + lsr r25 + ror r0 + lsr r25 + ror r0 + or r25,r0 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Z+16 + ldd r3,Z+17 + ldd r4,Z+18 + ldd r5,Z+19 + ldd r6,Z+20 + ldd r7,Z+21 + ldd r8,Z+22 + ldd r9,Z+23 + ldd r10,Z+24 + ldd r11,Z+25 + ldd r12,Z+26 + ldd r13,Z+27 + ldd r14,Z+28 + ldd r15,Z+29 + ldd r24,Z+30 + ldd r25,Z+31 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r2,r18 + eor r3,r19 + eor r4,r20 + eor r5,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r6,r18 + eor r7,r19 + eor r8,r20 + eor r9,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r10,r18 + eor r11,r19 + eor r12,r20 + eor r13,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + eor r14,r22 + eor r15,r23 + eor r24,r26 + eor r25,r27 + eor r6,r10 + mov r0,r10 + and r0,r6 + eor r14,r0 + mov r18,r14 + and r14,r6 + eor r14,r10 + mov r22,r14 + eor r14,r2 + com r14 + mov r10,r14 + or r22,r2 + eor r2,r18 + eor r6,r2 + or r10,r6 + eor r10,r18 + eor r6,r22 + eor r14,r6 + eor r7,r11 + mov r0,r11 + and r0,r7 + eor r15,r0 + mov r19,r15 + and r15,r7 + eor r15,r11 + mov r22,r15 + eor r15,r3 + com r15 + mov r11,r15 + or r22,r3 + eor r3,r19 + eor r7,r3 + or r11,r7 + eor r11,r19 + eor r7,r22 + eor r15,r7 + eor r8,r12 + mov r0,r12 + and r0,r8 + eor r24,r0 + mov r20,r24 + and r24,r8 + eor r24,r12 + mov r22,r24 + eor r24,r4 + com r24 + mov r12,r24 + or r22,r4 + eor r4,r20 + eor r8,r4 + or r12,r8 + eor r12,r20 + eor r8,r22 + eor r24,r8 + eor r9,r13 + mov r0,r13 + and r0,r9 + eor r25,r0 + mov r21,r25 + and r25,r9 + eor r25,r13 + mov r22,r25 + eor r25,r5 + com r25 + mov r13,r25 + or r22,r5 + eor r5,r21 + eor r9,r5 + or r13,r9 + eor r13,r21 + eor r9,r22 + eor r25,r9 + swap r2 + swap r6 + swap r10 + swap r14 + lsl r3 + adc r3,r1 + lsl r3 + adc r3,r1 + lsl r3 + adc r3,r1 + lsl r7 + adc r7,r1 + lsl r7 + adc r7,r1 + lsl r7 + adc r7,r1 + lsl r11 + adc r11,r1 + lsl r11 + adc r11,r1 + lsl r11 + adc r11,r1 + lsl r15 + adc r15,r1 + lsl r15 + adc r15,r1 + lsl r15 + adc r15,r1 + lsl r4 + adc r4,r1 + lsl r4 + adc r4,r1 + lsl r8 + adc r8,r1 + lsl r8 + adc r8,r1 + lsl r12 + adc r12,r1 + lsl r12 + adc r12,r1 + lsl r24 + adc r24,r1 + lsl r24 + adc r24,r1 + lsl r5 + adc r5,r1 + lsl r9 + adc r9,r1 + lsl r13 + adc r13,r1 + lsl r25 + adc r25,r1 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r6,Y+2 + ldd r10,Y+3 + ldd r14,Y+4 + ldd r3,Y+5 + ldd r7,Y+6 + ldd r11,Y+7 + ldd r15,Y+8 + ldd r4,Y+9 + ldd r8,Y+10 + ldd r12,Y+11 + ldd r24,Y+12 + ldd r5,Y+13 + ldd r9,Y+14 + ldd r13,Y+15 + ldd r25,Y+16 + movw r22,r2 + movw r26,r4 + eor r22,r27 + mov r18,r27 + mov r19,r22 + mov r20,r23 + mov r21,r26 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + st Z,r18 + std Z+4,r19 + std Z+8,r20 + std Z+12,r21 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + movw r18,r26 + movw r20,r22 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+1,r18 + std Z+5,r19 + std Z+9,r20 + std Z+13,r21 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + movw r18,r26 + movw r20,r22 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+2,r18 + std Z+6,r19 + std Z+10,r20 + std Z+14,r21 + movw r18,r2 + movw r20,r4 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + std Z+3,r18 + std Z+7,r19 + std Z+11,r20 + std Z+15,r21 + movw r18,r2 + movw r20,r4 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+16,r18 + std Z+20,r19 + std Z+24,r20 + std Z+28,r21 + movw r18,r2 + movw r20,r4 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+17,r18 + std Z+21,r19 + std Z+25,r20 + std Z+29,r21 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + movw r18,r26 + movw r20,r22 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+18,r18 + std Z+22,r19 + std Z+26,r20 + std Z+30,r21 + movw r18,r2 + movw r20,r4 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+19,r18 + std Z+23,r19 + std Z+27,r20 + std Z+31,r21 + ldd r2,Y+17 + ldd r6,Y+18 + ldd r10,Y+19 + ldd r14,Y+20 + ldd r3,Y+21 + ldd r7,Y+22 + ldd r11,Y+23 + ldd r15,Y+24 + ldd r4,Y+25 + ldd r8,Y+26 + ldd r12,Y+27 + ldd r24,Y+28 + ldd r5,Y+29 + ldd r9,Y+30 + ldd r13,Y+31 + ldd r25,Y+32 + ld r18,Z + ldd r19,Z+4 + ldd r20,Z+8 + ldd r21,Z+12 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + st Z,r18 + std Z+4,r19 + std Z+8,r20 + std Z+12,r21 + ldd r18,Z+1 + ldd r19,Z+5 + ldd r20,Z+9 + ldd r21,Z+13 + eor r18,r2 + eor r19,r3 + eor r20,r4 + eor r21,r5 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + std Z+1,r18 + std Z+5,r19 + std Z+9,r20 + std Z+13,r21 + ldd r18,Z+2 + ldd r19,Z+6 + ldd r20,Z+10 + ldd r21,Z+14 + eor r18,r2 + eor r19,r3 + eor r20,r4 + eor r21,r5 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+2,r18 + std Z+6,r19 + std Z+10,r20 + std Z+14,r21 + ldd r18,Z+3 + ldd r19,Z+7 + ldd r20,Z+11 + ldd r21,Z+15 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+3,r18 + std Z+7,r19 + std Z+11,r20 + std Z+15,r21 + ldd r18,Z+16 + ldd r19,Z+20 + ldd r20,Z+24 + ldd r21,Z+28 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+16,r18 + std Z+20,r19 + std Z+24,r20 + std Z+28,r21 + ldd r18,Z+17 + ldd r19,Z+21 + ldd r20,Z+25 + ldd r21,Z+29 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + std Z+17,r18 + std Z+21,r19 + std Z+25,r20 + std Z+29,r21 + ldd r18,Z+18 + ldd r19,Z+22 + ldd r20,Z+26 + ldd r21,Z+30 + eor r18,r2 + eor r19,r3 + eor r20,r4 + eor r21,r5 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+18,r18 + std Z+22,r19 + std Z+26,r20 + std Z+30,r21 + ldd r18,Z+19 + ldd r19,Z+23 + ldd r20,Z+27 + ldd r21,Z+31 + eor r18,r2 + eor r19,r3 + eor r20,r4 + eor r21,r5 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + std Z+19,r18 + std Z+23,r19 + std Z+27,r20 + std Z+31,r21 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r24,Z+14 + ldd r25,Z+15 + ret +1960: + bst r2,0 + bld r18,0 + bst r6,0 + bld r18,1 + bst r10,0 + bld r18,2 + bst r14,0 + bld r18,3 + bst r2,1 + bld r18,4 + bst r6,1 + bld r18,5 + bst r10,1 + bld r18,6 + bst r14,1 + bld r18,7 + bst r2,2 + bld r19,0 + bst r6,2 + bld r19,1 + bst r10,2 + bld r19,2 + bst r14,2 + bld r19,3 + bst r2,3 + bld r19,4 + bst r6,3 + bld r19,5 + bst r10,3 + bld r19,6 + bst r14,3 + bld r19,7 + bst r2,4 + bld r20,0 + bst r6,4 + bld r20,1 + bst r10,4 + bld r20,2 + bst r14,4 + bld r20,3 + bst r2,5 + bld r20,4 + bst r6,5 + bld r20,5 + bst r10,5 + bld r20,6 + bst r14,5 + bld r20,7 + bst r2,6 + bld r21,0 + bst r6,6 + bld r21,1 + bst r10,6 + bld r21,2 + bst r14,6 + bld r21,3 + bst r2,7 + bld r21,4 + bst r6,7 + bld r21,5 + bst r10,7 + bld r21,6 + bst r14,7 + bld r21,7 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + bst r3,0 + bld r18,0 + bst r7,0 + bld r18,1 + bst r11,0 + bld r18,2 + bst r15,0 + bld r18,3 + bst r3,1 + bld r18,4 + bst r7,1 + bld r18,5 + bst r11,1 + bld r18,6 + bst r15,1 + bld r18,7 + bst r3,2 + bld r19,0 + bst r7,2 + bld r19,1 + bst r11,2 + bld r19,2 + bst r15,2 + bld r19,3 + bst r3,3 + bld r19,4 + bst r7,3 + bld r19,5 + bst r11,3 + bld r19,6 + bst r15,3 + bld r19,7 + bst r3,4 + bld r20,0 + bst r7,4 + bld r20,1 + bst r11,4 + bld r20,2 + bst r15,4 + bld r20,3 + bst r3,5 + bld r20,4 + bst r7,5 + bld r20,5 + bst r11,5 + bld r20,6 + bst r15,5 + bld r20,7 + bst r3,6 + bld r21,0 + bst r7,6 + bld r21,1 + bst r11,6 + bld r21,2 + bst r15,6 + bld r21,3 + bst r3,7 + bld r21,4 + bst r7,7 + bld r21,5 + bst r11,7 + bld r21,6 + bst r15,7 + bld r21,7 + std Z+4,r18 + std Z+5,r19 + std Z+6,r20 + std Z+7,r21 + bst r4,0 + bld r18,0 + bst r8,0 + bld r18,1 + bst r12,0 + bld r18,2 + bst r24,0 + bld r18,3 + bst r4,1 + bld r18,4 + bst r8,1 + bld r18,5 + bst r12,1 + bld r18,6 + bst r24,1 + bld r18,7 + bst r4,2 + bld r19,0 + bst r8,2 + bld r19,1 + bst r12,2 + bld r19,2 + bst r24,2 + bld r19,3 + bst r4,3 + bld r19,4 + bst r8,3 + bld r19,5 + bst r12,3 + bld r19,6 + bst r24,3 + bld r19,7 + bst r4,4 + bld r20,0 + bst r8,4 + bld r20,1 + bst r12,4 + bld r20,2 + bst r24,4 + bld r20,3 + bst r4,5 + bld r20,4 + bst r8,5 + bld r20,5 + bst r12,5 + bld r20,6 + bst r24,5 + bld r20,7 + bst r4,6 + bld r21,0 + bst r8,6 + bld r21,1 + bst r12,6 + bld r21,2 + bst r24,6 + bld r21,3 + bst r4,7 + bld r21,4 + bst r8,7 + bld r21,5 + bst r12,7 + bld r21,6 + bst r24,7 + bld r21,7 + std Z+8,r18 + std Z+9,r19 + std Z+10,r20 + std Z+11,r21 + bst r5,0 + bld r18,0 + bst r9,0 + bld r18,1 + bst r13,0 + bld r18,2 + bst r25,0 + bld r18,3 + bst r5,1 + bld r18,4 + bst r9,1 + bld r18,5 + bst r13,1 + bld r18,6 + bst r25,1 + bld r18,7 + bst r5,2 + bld r19,0 + bst r9,2 + bld r19,1 + bst r13,2 + bld r19,2 + bst r25,2 + bld r19,3 + bst r5,3 + bld r19,4 + bst r9,3 + bld r19,5 + bst r13,3 + bld r19,6 + bst r25,3 + bld r19,7 + bst r5,4 + bld r20,0 + bst r9,4 + bld r20,1 + bst r13,4 + bld r20,2 + bst r25,4 + bld r20,3 + bst r5,5 + bld r20,4 + bst r9,5 + bld r20,5 + bst r13,5 + bld r20,6 + bst r25,5 + bld r20,7 + bst r5,6 + bld r21,0 + bst r9,6 + bld r21,1 + bst r13,6 + bld r21,2 + bst r25,6 + bld r21,3 + bst r5,7 + bld r21,4 + bst r9,7 + bld r21,5 + bst r13,7 + bld r21,6 + bst r25,7 + bld r21,7 + std Z+12,r18 + std Z+13,r19 + std Z+14,r20 + std Z+15,r21 + ldd r2,Z+16 + ldd r3,Z+17 + ldd r4,Z+18 + ldd r5,Z+19 + ldd r6,Z+20 + ldd r7,Z+21 + ldd r8,Z+22 + ldd r9,Z+23 + ldd r10,Z+24 + ldd r11,Z+25 + ldd r12,Z+26 + ldd r13,Z+27 + ldd r14,Z+28 + ldd r15,Z+29 + ldd r24,Z+30 + ldd r25,Z+31 + bst r2,0 + bld r18,0 + bst r6,0 + bld r18,1 + bst r10,0 + bld r18,2 + bst r14,0 + bld r18,3 + bst r2,1 + bld r18,4 + bst r6,1 + bld r18,5 + bst r10,1 + bld r18,6 + bst r14,1 + bld r18,7 + bst r2,2 + bld r19,0 + bst r6,2 + bld r19,1 + bst r10,2 + bld r19,2 + bst r14,2 + bld r19,3 + bst r2,3 + bld r19,4 + bst r6,3 + bld r19,5 + bst r10,3 + bld r19,6 + bst r14,3 + bld r19,7 + bst r2,4 + bld r20,0 + bst r6,4 + bld r20,1 + bst r10,4 + bld r20,2 + bst r14,4 + bld r20,3 + bst r2,5 + bld r20,4 + bst r6,5 + bld r20,5 + bst r10,5 + bld r20,6 + bst r14,5 + bld r20,7 + bst r2,6 + bld r21,0 + bst r6,6 + bld r21,1 + bst r10,6 + bld r21,2 + bst r14,6 + bld r21,3 + bst r2,7 + bld r21,4 + bst r6,7 + bld r21,5 + bst r10,7 + bld r21,6 + bst r14,7 + bld r21,7 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + bst r3,0 + bld r18,0 + bst r7,0 + bld r18,1 + bst r11,0 + bld r18,2 + bst r15,0 + bld r18,3 + bst r3,1 + bld r18,4 + bst r7,1 + bld r18,5 + bst r11,1 + bld r18,6 + bst r15,1 + bld r18,7 + bst r3,2 + bld r19,0 + bst r7,2 + bld r19,1 + bst r11,2 + bld r19,2 + bst r15,2 + bld r19,3 + bst r3,3 + bld r19,4 + bst r7,3 + bld r19,5 + bst r11,3 + bld r19,6 + bst r15,3 + bld r19,7 + bst r3,4 + bld r20,0 + bst r7,4 + bld r20,1 + bst r11,4 + bld r20,2 + bst r15,4 + bld r20,3 + bst r3,5 + bld r20,4 + bst r7,5 + bld r20,5 + bst r11,5 + bld r20,6 + bst r15,5 + bld r20,7 + bst r3,6 + bld r21,0 + bst r7,6 + bld r21,1 + bst r11,6 + bld r21,2 + bst r15,6 + bld r21,3 + bst r3,7 + bld r21,4 + bst r7,7 + bld r21,5 + bst r11,7 + bld r21,6 + bst r15,7 + bld r21,7 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + bst r4,0 + bld r18,0 + bst r8,0 + bld r18,1 + bst r12,0 + bld r18,2 + bst r24,0 + bld r18,3 + bst r4,1 + bld r18,4 + bst r8,1 + bld r18,5 + bst r12,1 + bld r18,6 + bst r24,1 + bld r18,7 + bst r4,2 + bld r19,0 + bst r8,2 + bld r19,1 + bst r12,2 + bld r19,2 + bst r24,2 + bld r19,3 + bst r4,3 + bld r19,4 + bst r8,3 + bld r19,5 + bst r12,3 + bld r19,6 + bst r24,3 + bld r19,7 + bst r4,4 + bld r20,0 + bst r8,4 + bld r20,1 + bst r12,4 + bld r20,2 + bst r24,4 + bld r20,3 + bst r4,5 + bld r20,4 + bst r8,5 + bld r20,5 + bst r12,5 + bld r20,6 + bst r24,5 + bld r20,7 + bst r4,6 + bld r21,0 + bst r8,6 + bld r21,1 + bst r12,6 + bld r21,2 + bst r24,6 + bld r21,3 + bst r4,7 + bld r21,4 + bst r8,7 + bld r21,5 + bst r12,7 + bld r21,6 + bst r24,7 + bld r21,7 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + bst r5,0 + bld r18,0 + bst r9,0 + bld r18,1 + bst r13,0 + bld r18,2 + bst r25,0 + bld r18,3 + bst r5,1 + bld r18,4 + bst r9,1 + bld r18,5 + bst r13,1 + bld r18,6 + bst r25,1 + bld r18,7 + bst r5,2 + bld r19,0 + bst r9,2 + bld r19,1 + bst r13,2 + bld r19,2 + bst r25,2 + bld r19,3 + bst r5,3 + bld r19,4 + bst r9,3 + bld r19,5 + bst r13,3 + bld r19,6 + bst r25,3 + bld r19,7 + bst r5,4 + bld r20,0 + bst r9,4 + bld r20,1 + bst r13,4 + bld r20,2 + bst r25,4 + bld r20,3 + bst r5,5 + bld r20,4 + bst r9,5 + bld r20,5 + bst r13,5 + bld r20,6 + bst r25,5 + bld r20,7 + bst r5,6 + bld r21,0 + bst r9,6 + bld r21,1 + bst r13,6 + bld r21,2 + bst r25,6 + bld r21,3 + bst r5,7 + bld r21,4 + bst r9,7 + bld r21,5 + bst r13,7 + bld r21,6 + bst r25,7 + bld r21,7 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size photon256_permute, .-photon256_permute + +#endif diff --git a/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/rhys/internal-photon256.c b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/rhys/internal-photon256.c index b8743fe..5cb7dd1 100644 --- a/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/rhys/internal-photon256.c +++ b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/rhys/internal-photon256.c @@ -23,6 +23,8 @@ #include "internal-photon256.h" #include "internal-util.h" +#if !defined(__AVR__) + /** * \brief Number of rounds in the PHOTON-256 permutation in bit-sliced form. */ @@ -477,3 +479,5 @@ void photon256_permute(unsigned char state[PHOTON256_STATE_SIZE]) /* Convert back from bit-sliced form to regular form */ photon256_from_sliced(state, S.bytes); } + +#endif /* !__AVR__ */ diff --git a/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/rhys/internal-photon256-avr.S b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/rhys/internal-photon256-avr.S new file mode 100644 index 0000000..5826dd3 --- /dev/null +++ b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/rhys/internal-photon256-avr.S @@ -0,0 +1,2583 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .text +.global photon256_permute + .type photon256_permute, @function +photon256_permute: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 49 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + bst r18,0 + bld r2,0 + bst r18,1 + bld r6,0 + bst r18,2 + bld r10,0 + bst r18,3 + bld r14,0 + bst r18,4 + bld r2,1 + bst r18,5 + bld r6,1 + bst r18,6 + bld r10,1 + bst r18,7 + bld r14,1 + bst r19,0 + bld r2,2 + bst r19,1 + bld r6,2 + bst r19,2 + bld r10,2 + bst r19,3 + bld r14,2 + bst r19,4 + bld r2,3 + bst r19,5 + bld r6,3 + bst r19,6 + bld r10,3 + bst r19,7 + bld r14,3 + bst r20,0 + bld r2,4 + bst r20,1 + bld r6,4 + bst r20,2 + bld r10,4 + bst r20,3 + bld r14,4 + bst r20,4 + bld r2,5 + bst r20,5 + bld r6,5 + bst r20,6 + bld r10,5 + bst r20,7 + bld r14,5 + bst r21,0 + bld r2,6 + bst r21,1 + bld r6,6 + bst r21,2 + bld r10,6 + bst r21,3 + bld r14,6 + bst r21,4 + bld r2,7 + bst r21,5 + bld r6,7 + bst r21,6 + bld r10,7 + bst r21,7 + bld r14,7 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + bst r18,0 + bld r3,0 + bst r18,1 + bld r7,0 + bst r18,2 + bld r11,0 + bst r18,3 + bld r15,0 + bst r18,4 + bld r3,1 + bst r18,5 + bld r7,1 + bst r18,6 + bld r11,1 + bst r18,7 + bld r15,1 + bst r19,0 + bld r3,2 + bst r19,1 + bld r7,2 + bst r19,2 + bld r11,2 + bst r19,3 + bld r15,2 + bst r19,4 + bld r3,3 + bst r19,5 + bld r7,3 + bst r19,6 + bld r11,3 + bst r19,7 + bld r15,3 + bst r20,0 + bld r3,4 + bst r20,1 + bld r7,4 + bst r20,2 + bld r11,4 + bst r20,3 + bld r15,4 + bst r20,4 + bld r3,5 + bst r20,5 + bld r7,5 + bst r20,6 + bld r11,5 + bst r20,7 + bld r15,5 + bst r21,0 + bld r3,6 + bst r21,1 + bld r7,6 + bst r21,2 + bld r11,6 + bst r21,3 + bld r15,6 + bst r21,4 + bld r3,7 + bst r21,5 + bld r7,7 + bst r21,6 + bld r11,7 + bst r21,7 + bld r15,7 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + bst r18,0 + bld r4,0 + bst r18,1 + bld r8,0 + bst r18,2 + bld r12,0 + bst r18,3 + bld r24,0 + bst r18,4 + bld r4,1 + bst r18,5 + bld r8,1 + bst r18,6 + bld r12,1 + bst r18,7 + bld r24,1 + bst r19,0 + bld r4,2 + bst r19,1 + bld r8,2 + bst r19,2 + bld r12,2 + bst r19,3 + bld r24,2 + bst r19,4 + bld r4,3 + bst r19,5 + bld r8,3 + bst r19,6 + bld r12,3 + bst r19,7 + bld r24,3 + bst r20,0 + bld r4,4 + bst r20,1 + bld r8,4 + bst r20,2 + bld r12,4 + bst r20,3 + bld r24,4 + bst r20,4 + bld r4,5 + bst r20,5 + bld r8,5 + bst r20,6 + bld r12,5 + bst r20,7 + bld r24,5 + bst r21,0 + bld r4,6 + bst r21,1 + bld r8,6 + bst r21,2 + bld r12,6 + bst r21,3 + bld r24,6 + bst r21,4 + bld r4,7 + bst r21,5 + bld r8,7 + bst r21,6 + bld r12,7 + bst r21,7 + bld r24,7 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + bst r18,0 + bld r5,0 + bst r18,1 + bld r9,0 + bst r18,2 + bld r13,0 + bst r18,3 + bld r25,0 + bst r18,4 + bld r5,1 + bst r18,5 + bld r9,1 + bst r18,6 + bld r13,1 + bst r18,7 + bld r25,1 + bst r19,0 + bld r5,2 + bst r19,1 + bld r9,2 + bst r19,2 + bld r13,2 + bst r19,3 + bld r25,2 + bst r19,4 + bld r5,3 + bst r19,5 + bld r9,3 + bst r19,6 + bld r13,3 + bst r19,7 + bld r25,3 + bst r20,0 + bld r5,4 + bst r20,1 + bld r9,4 + bst r20,2 + bld r13,4 + bst r20,3 + bld r25,4 + bst r20,4 + bld r5,5 + bst r20,5 + bld r9,5 + bst r20,6 + bld r13,5 + bst r20,7 + bld r25,5 + bst r21,0 + bld r5,6 + bst r21,1 + bld r9,6 + bst r21,2 + bld r13,6 + bst r21,3 + bld r25,6 + bst r21,4 + bld r5,7 + bst r21,5 + bld r9,7 + bst r21,6 + bld r13,7 + bst r21,7 + bld r25,7 + std Z+16,r2 + std Z+17,r3 + std Z+18,r4 + std Z+19,r5 + std Z+20,r6 + std Z+21,r7 + std Z+22,r8 + std Z+23,r9 + std Z+24,r10 + std Z+25,r11 + std Z+26,r12 + std Z+27,r13 + std Z+28,r14 + std Z+29,r15 + std Z+30,r24 + std Z+31,r25 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + bst r18,0 + bld r2,0 + bst r18,1 + bld r6,0 + bst r18,2 + bld r10,0 + bst r18,3 + bld r14,0 + bst r18,4 + bld r2,1 + bst r18,5 + bld r6,1 + bst r18,6 + bld r10,1 + bst r18,7 + bld r14,1 + bst r19,0 + bld r2,2 + bst r19,1 + bld r6,2 + bst r19,2 + bld r10,2 + bst r19,3 + bld r14,2 + bst r19,4 + bld r2,3 + bst r19,5 + bld r6,3 + bst r19,6 + bld r10,3 + bst r19,7 + bld r14,3 + bst r20,0 + bld r2,4 + bst r20,1 + bld r6,4 + bst r20,2 + bld r10,4 + bst r20,3 + bld r14,4 + bst r20,4 + bld r2,5 + bst r20,5 + bld r6,5 + bst r20,6 + bld r10,5 + bst r20,7 + bld r14,5 + bst r21,0 + bld r2,6 + bst r21,1 + bld r6,6 + bst r21,2 + bld r10,6 + bst r21,3 + bld r14,6 + bst r21,4 + bld r2,7 + bst r21,5 + bld r6,7 + bst r21,6 + bld r10,7 + bst r21,7 + bld r14,7 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r20,Z+6 + ldd r21,Z+7 + bst r18,0 + bld r3,0 + bst r18,1 + bld r7,0 + bst r18,2 + bld r11,0 + bst r18,3 + bld r15,0 + bst r18,4 + bld r3,1 + bst r18,5 + bld r7,1 + bst r18,6 + bld r11,1 + bst r18,7 + bld r15,1 + bst r19,0 + bld r3,2 + bst r19,1 + bld r7,2 + bst r19,2 + bld r11,2 + bst r19,3 + bld r15,2 + bst r19,4 + bld r3,3 + bst r19,5 + bld r7,3 + bst r19,6 + bld r11,3 + bst r19,7 + bld r15,3 + bst r20,0 + bld r3,4 + bst r20,1 + bld r7,4 + bst r20,2 + bld r11,4 + bst r20,3 + bld r15,4 + bst r20,4 + bld r3,5 + bst r20,5 + bld r7,5 + bst r20,6 + bld r11,5 + bst r20,7 + bld r15,5 + bst r21,0 + bld r3,6 + bst r21,1 + bld r7,6 + bst r21,2 + bld r11,6 + bst r21,3 + bld r15,6 + bst r21,4 + bld r3,7 + bst r21,5 + bld r7,7 + bst r21,6 + bld r11,7 + bst r21,7 + bld r15,7 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r20,Z+10 + ldd r21,Z+11 + bst r18,0 + bld r4,0 + bst r18,1 + bld r8,0 + bst r18,2 + bld r12,0 + bst r18,3 + bld r24,0 + bst r18,4 + bld r4,1 + bst r18,5 + bld r8,1 + bst r18,6 + bld r12,1 + bst r18,7 + bld r24,1 + bst r19,0 + bld r4,2 + bst r19,1 + bld r8,2 + bst r19,2 + bld r12,2 + bst r19,3 + bld r24,2 + bst r19,4 + bld r4,3 + bst r19,5 + bld r8,3 + bst r19,6 + bld r12,3 + bst r19,7 + bld r24,3 + bst r20,0 + bld r4,4 + bst r20,1 + bld r8,4 + bst r20,2 + bld r12,4 + bst r20,3 + bld r24,4 + bst r20,4 + bld r4,5 + bst r20,5 + bld r8,5 + bst r20,6 + bld r12,5 + bst r20,7 + bld r24,5 + bst r21,0 + bld r4,6 + bst r21,1 + bld r8,6 + bst r21,2 + bld r12,6 + bst r21,3 + bld r24,6 + bst r21,4 + bld r4,7 + bst r21,5 + bld r8,7 + bst r21,6 + bld r12,7 + bst r21,7 + bld r24,7 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r20,Z+14 + ldd r21,Z+15 + bst r18,0 + bld r5,0 + bst r18,1 + bld r9,0 + bst r18,2 + bld r13,0 + bst r18,3 + bld r25,0 + bst r18,4 + bld r5,1 + bst r18,5 + bld r9,1 + bst r18,6 + bld r13,1 + bst r18,7 + bld r25,1 + bst r19,0 + bld r5,2 + bst r19,1 + bld r9,2 + bst r19,2 + bld r13,2 + bst r19,3 + bld r25,2 + bst r19,4 + bld r5,3 + bst r19,5 + bld r9,3 + bst r19,6 + bld r13,3 + bst r19,7 + bld r25,3 + bst r20,0 + bld r5,4 + bst r20,1 + bld r9,4 + bst r20,2 + bld r13,4 + bst r20,3 + bld r25,4 + bst r20,4 + bld r5,5 + bst r20,5 + bld r9,5 + bst r20,6 + bld r13,5 + bst r20,7 + bld r25,5 + bst r21,0 + bld r5,6 + bst r21,1 + bld r9,6 + bst r21,2 + bld r13,6 + bst r21,3 + bld r25,6 + bst r21,4 + bld r5,7 + bst r21,5 + bld r9,7 + bst r21,6 + bld r13,7 + bst r21,7 + bld r25,7 + ldi r22,225 + ldi r23,240 + ldi r26,210 + ldi r27,150 + rcall 621f + ldi r22,195 + ldi r23,210 + ldi r26,240 + ldi r27,180 + rcall 621f + ldi r22,135 + ldi r23,150 + ldi r26,180 + ldi r27,240 + rcall 621f + ldi r22,30 + ldi r23,15 + ldi r26,45 + ldi r27,105 + rcall 621f + ldi r22,45 + ldi r23,60 + ldi r26,30 + ldi r27,90 + rcall 621f + ldi r22,75 + ldi r23,90 + ldi r26,120 + ldi r27,60 + rcall 621f + ldi r22,150 + ldi r23,135 + ldi r26,165 + ldi r27,225 + rcall 621f + ldi r22,60 + ldi r23,45 + ldi r26,15 + ldi r27,75 + rcall 621f + ldi r22,105 + ldi r23,120 + ldi r26,90 + ldi r27,30 + rcall 621f + ldi r22,210 + ldi r23,195 + ldi r26,225 + ldi r27,165 + rcall 621f + ldi r22,165 + ldi r23,180 + ldi r26,150 + ldi r27,210 + rcall 621f + ldi r22,90 + ldi r23,75 + ldi r26,105 + ldi r27,45 + rcall 621f + rjmp 1960f +621: + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r2,r18 + eor r3,r19 + eor r4,r20 + eor r5,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r6,r18 + eor r7,r19 + eor r8,r20 + eor r9,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r10,r18 + eor r11,r19 + eor r12,r20 + eor r13,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r14,r18 + eor r15,r19 + eor r24,r20 + eor r25,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + eor r6,r10 + mov r0,r10 + and r0,r6 + eor r14,r0 + mov r18,r14 + and r14,r6 + eor r14,r10 + mov r16,r14 + eor r14,r2 + com r14 + mov r10,r14 + or r16,r2 + eor r2,r18 + eor r6,r2 + or r10,r6 + eor r10,r18 + eor r6,r16 + eor r14,r6 + eor r7,r11 + mov r0,r11 + and r0,r7 + eor r15,r0 + mov r19,r15 + and r15,r7 + eor r15,r11 + mov r16,r15 + eor r15,r3 + com r15 + mov r11,r15 + or r16,r3 + eor r3,r19 + eor r7,r3 + or r11,r7 + eor r11,r19 + eor r7,r16 + eor r15,r7 + eor r8,r12 + mov r0,r12 + and r0,r8 + eor r24,r0 + mov r20,r24 + and r24,r8 + eor r24,r12 + mov r16,r24 + eor r24,r4 + com r24 + mov r12,r24 + or r16,r4 + eor r4,r20 + eor r8,r4 + or r12,r8 + eor r12,r20 + eor r8,r16 + eor r24,r8 + eor r9,r13 + mov r0,r13 + and r0,r9 + eor r25,r0 + mov r21,r25 + and r25,r9 + eor r25,r13 + mov r16,r25 + eor r25,r5 + com r25 + mov r13,r25 + or r16,r5 + eor r5,r21 + eor r9,r5 + or r13,r9 + eor r13,r21 + eor r9,r16 + eor r25,r9 + bst r3,0 + lsr r3 + bld r3,7 + bst r7,0 + lsr r7 + bld r7,7 + bst r11,0 + lsr r11 + bld r11,7 + bst r15,0 + lsr r15 + bld r15,7 + mov r0,r1 + lsr r4 + ror r0 + lsr r4 + ror r0 + or r4,r0 + mov r0,r1 + lsr r8 + ror r0 + lsr r8 + ror r0 + or r8,r0 + mov r0,r1 + lsr r12 + ror r0 + lsr r12 + ror r0 + or r12,r0 + mov r0,r1 + lsr r24 + ror r0 + lsr r24 + ror r0 + or r24,r0 + mov r0,r1 + lsr r5 + ror r0 + lsr r5 + ror r0 + lsr r5 + ror r0 + or r5,r0 + mov r0,r1 + lsr r9 + ror r0 + lsr r9 + ror r0 + lsr r9 + ror r0 + or r9,r0 + mov r0,r1 + lsr r13 + ror r0 + lsr r13 + ror r0 + lsr r13 + ror r0 + or r13,r0 + mov r0,r1 + lsr r25 + ror r0 + lsr r25 + ror r0 + lsr r25 + ror r0 + or r25,r0 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Z+16 + ldd r3,Z+17 + ldd r4,Z+18 + ldd r5,Z+19 + ldd r6,Z+20 + ldd r7,Z+21 + ldd r8,Z+22 + ldd r9,Z+23 + ldd r10,Z+24 + ldd r11,Z+25 + ldd r12,Z+26 + ldd r13,Z+27 + ldd r14,Z+28 + ldd r15,Z+29 + ldd r24,Z+30 + ldd r25,Z+31 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r2,r18 + eor r3,r19 + eor r4,r20 + eor r5,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r6,r18 + eor r7,r19 + eor r8,r20 + eor r9,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r10,r18 + eor r11,r19 + eor r12,r20 + eor r13,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + eor r14,r22 + eor r15,r23 + eor r24,r26 + eor r25,r27 + eor r6,r10 + mov r0,r10 + and r0,r6 + eor r14,r0 + mov r18,r14 + and r14,r6 + eor r14,r10 + mov r22,r14 + eor r14,r2 + com r14 + mov r10,r14 + or r22,r2 + eor r2,r18 + eor r6,r2 + or r10,r6 + eor r10,r18 + eor r6,r22 + eor r14,r6 + eor r7,r11 + mov r0,r11 + and r0,r7 + eor r15,r0 + mov r19,r15 + and r15,r7 + eor r15,r11 + mov r22,r15 + eor r15,r3 + com r15 + mov r11,r15 + or r22,r3 + eor r3,r19 + eor r7,r3 + or r11,r7 + eor r11,r19 + eor r7,r22 + eor r15,r7 + eor r8,r12 + mov r0,r12 + and r0,r8 + eor r24,r0 + mov r20,r24 + and r24,r8 + eor r24,r12 + mov r22,r24 + eor r24,r4 + com r24 + mov r12,r24 + or r22,r4 + eor r4,r20 + eor r8,r4 + or r12,r8 + eor r12,r20 + eor r8,r22 + eor r24,r8 + eor r9,r13 + mov r0,r13 + and r0,r9 + eor r25,r0 + mov r21,r25 + and r25,r9 + eor r25,r13 + mov r22,r25 + eor r25,r5 + com r25 + mov r13,r25 + or r22,r5 + eor r5,r21 + eor r9,r5 + or r13,r9 + eor r13,r21 + eor r9,r22 + eor r25,r9 + swap r2 + swap r6 + swap r10 + swap r14 + lsl r3 + adc r3,r1 + lsl r3 + adc r3,r1 + lsl r3 + adc r3,r1 + lsl r7 + adc r7,r1 + lsl r7 + adc r7,r1 + lsl r7 + adc r7,r1 + lsl r11 + adc r11,r1 + lsl r11 + adc r11,r1 + lsl r11 + adc r11,r1 + lsl r15 + adc r15,r1 + lsl r15 + adc r15,r1 + lsl r15 + adc r15,r1 + lsl r4 + adc r4,r1 + lsl r4 + adc r4,r1 + lsl r8 + adc r8,r1 + lsl r8 + adc r8,r1 + lsl r12 + adc r12,r1 + lsl r12 + adc r12,r1 + lsl r24 + adc r24,r1 + lsl r24 + adc r24,r1 + lsl r5 + adc r5,r1 + lsl r9 + adc r9,r1 + lsl r13 + adc r13,r1 + lsl r25 + adc r25,r1 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r6,Y+2 + ldd r10,Y+3 + ldd r14,Y+4 + ldd r3,Y+5 + ldd r7,Y+6 + ldd r11,Y+7 + ldd r15,Y+8 + ldd r4,Y+9 + ldd r8,Y+10 + ldd r12,Y+11 + ldd r24,Y+12 + ldd r5,Y+13 + ldd r9,Y+14 + ldd r13,Y+15 + ldd r25,Y+16 + movw r22,r2 + movw r26,r4 + eor r22,r27 + mov r18,r27 + mov r19,r22 + mov r20,r23 + mov r21,r26 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + st Z,r18 + std Z+4,r19 + std Z+8,r20 + std Z+12,r21 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + movw r18,r26 + movw r20,r22 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+1,r18 + std Z+5,r19 + std Z+9,r20 + std Z+13,r21 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + movw r18,r26 + movw r20,r22 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+2,r18 + std Z+6,r19 + std Z+10,r20 + std Z+14,r21 + movw r18,r2 + movw r20,r4 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + std Z+3,r18 + std Z+7,r19 + std Z+11,r20 + std Z+15,r21 + movw r18,r2 + movw r20,r4 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+16,r18 + std Z+20,r19 + std Z+24,r20 + std Z+28,r21 + movw r18,r2 + movw r20,r4 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+17,r18 + std Z+21,r19 + std Z+25,r20 + std Z+29,r21 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + movw r18,r26 + movw r20,r22 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+18,r18 + std Z+22,r19 + std Z+26,r20 + std Z+30,r21 + movw r18,r2 + movw r20,r4 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+19,r18 + std Z+23,r19 + std Z+27,r20 + std Z+31,r21 + ldd r2,Y+17 + ldd r6,Y+18 + ldd r10,Y+19 + ldd r14,Y+20 + ldd r3,Y+21 + ldd r7,Y+22 + ldd r11,Y+23 + ldd r15,Y+24 + ldd r4,Y+25 + ldd r8,Y+26 + ldd r12,Y+27 + ldd r24,Y+28 + ldd r5,Y+29 + ldd r9,Y+30 + ldd r13,Y+31 + ldd r25,Y+32 + ld r18,Z + ldd r19,Z+4 + ldd r20,Z+8 + ldd r21,Z+12 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + st Z,r18 + std Z+4,r19 + std Z+8,r20 + std Z+12,r21 + ldd r18,Z+1 + ldd r19,Z+5 + ldd r20,Z+9 + ldd r21,Z+13 + eor r18,r2 + eor r19,r3 + eor r20,r4 + eor r21,r5 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + std Z+1,r18 + std Z+5,r19 + std Z+9,r20 + std Z+13,r21 + ldd r18,Z+2 + ldd r19,Z+6 + ldd r20,Z+10 + ldd r21,Z+14 + eor r18,r2 + eor r19,r3 + eor r20,r4 + eor r21,r5 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+2,r18 + std Z+6,r19 + std Z+10,r20 + std Z+14,r21 + ldd r18,Z+3 + ldd r19,Z+7 + ldd r20,Z+11 + ldd r21,Z+15 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+3,r18 + std Z+7,r19 + std Z+11,r20 + std Z+15,r21 + ldd r18,Z+16 + ldd r19,Z+20 + ldd r20,Z+24 + ldd r21,Z+28 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+16,r18 + std Z+20,r19 + std Z+24,r20 + std Z+28,r21 + ldd r18,Z+17 + ldd r19,Z+21 + ldd r20,Z+25 + ldd r21,Z+29 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + std Z+17,r18 + std Z+21,r19 + std Z+25,r20 + std Z+29,r21 + ldd r18,Z+18 + ldd r19,Z+22 + ldd r20,Z+26 + ldd r21,Z+30 + eor r18,r2 + eor r19,r3 + eor r20,r4 + eor r21,r5 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+18,r18 + std Z+22,r19 + std Z+26,r20 + std Z+30,r21 + ldd r18,Z+19 + ldd r19,Z+23 + ldd r20,Z+27 + ldd r21,Z+31 + eor r18,r2 + eor r19,r3 + eor r20,r4 + eor r21,r5 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + std Z+19,r18 + std Z+23,r19 + std Z+27,r20 + std Z+31,r21 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r24,Z+14 + ldd r25,Z+15 + ret +1960: + bst r2,0 + bld r18,0 + bst r6,0 + bld r18,1 + bst r10,0 + bld r18,2 + bst r14,0 + bld r18,3 + bst r2,1 + bld r18,4 + bst r6,1 + bld r18,5 + bst r10,1 + bld r18,6 + bst r14,1 + bld r18,7 + bst r2,2 + bld r19,0 + bst r6,2 + bld r19,1 + bst r10,2 + bld r19,2 + bst r14,2 + bld r19,3 + bst r2,3 + bld r19,4 + bst r6,3 + bld r19,5 + bst r10,3 + bld r19,6 + bst r14,3 + bld r19,7 + bst r2,4 + bld r20,0 + bst r6,4 + bld r20,1 + bst r10,4 + bld r20,2 + bst r14,4 + bld r20,3 + bst r2,5 + bld r20,4 + bst r6,5 + bld r20,5 + bst r10,5 + bld r20,6 + bst r14,5 + bld r20,7 + bst r2,6 + bld r21,0 + bst r6,6 + bld r21,1 + bst r10,6 + bld r21,2 + bst r14,6 + bld r21,3 + bst r2,7 + bld r21,4 + bst r6,7 + bld r21,5 + bst r10,7 + bld r21,6 + bst r14,7 + bld r21,7 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + bst r3,0 + bld r18,0 + bst r7,0 + bld r18,1 + bst r11,0 + bld r18,2 + bst r15,0 + bld r18,3 + bst r3,1 + bld r18,4 + bst r7,1 + bld r18,5 + bst r11,1 + bld r18,6 + bst r15,1 + bld r18,7 + bst r3,2 + bld r19,0 + bst r7,2 + bld r19,1 + bst r11,2 + bld r19,2 + bst r15,2 + bld r19,3 + bst r3,3 + bld r19,4 + bst r7,3 + bld r19,5 + bst r11,3 + bld r19,6 + bst r15,3 + bld r19,7 + bst r3,4 + bld r20,0 + bst r7,4 + bld r20,1 + bst r11,4 + bld r20,2 + bst r15,4 + bld r20,3 + bst r3,5 + bld r20,4 + bst r7,5 + bld r20,5 + bst r11,5 + bld r20,6 + bst r15,5 + bld r20,7 + bst r3,6 + bld r21,0 + bst r7,6 + bld r21,1 + bst r11,6 + bld r21,2 + bst r15,6 + bld r21,3 + bst r3,7 + bld r21,4 + bst r7,7 + bld r21,5 + bst r11,7 + bld r21,6 + bst r15,7 + bld r21,7 + std Z+4,r18 + std Z+5,r19 + std Z+6,r20 + std Z+7,r21 + bst r4,0 + bld r18,0 + bst r8,0 + bld r18,1 + bst r12,0 + bld r18,2 + bst r24,0 + bld r18,3 + bst r4,1 + bld r18,4 + bst r8,1 + bld r18,5 + bst r12,1 + bld r18,6 + bst r24,1 + bld r18,7 + bst r4,2 + bld r19,0 + bst r8,2 + bld r19,1 + bst r12,2 + bld r19,2 + bst r24,2 + bld r19,3 + bst r4,3 + bld r19,4 + bst r8,3 + bld r19,5 + bst r12,3 + bld r19,6 + bst r24,3 + bld r19,7 + bst r4,4 + bld r20,0 + bst r8,4 + bld r20,1 + bst r12,4 + bld r20,2 + bst r24,4 + bld r20,3 + bst r4,5 + bld r20,4 + bst r8,5 + bld r20,5 + bst r12,5 + bld r20,6 + bst r24,5 + bld r20,7 + bst r4,6 + bld r21,0 + bst r8,6 + bld r21,1 + bst r12,6 + bld r21,2 + bst r24,6 + bld r21,3 + bst r4,7 + bld r21,4 + bst r8,7 + bld r21,5 + bst r12,7 + bld r21,6 + bst r24,7 + bld r21,7 + std Z+8,r18 + std Z+9,r19 + std Z+10,r20 + std Z+11,r21 + bst r5,0 + bld r18,0 + bst r9,0 + bld r18,1 + bst r13,0 + bld r18,2 + bst r25,0 + bld r18,3 + bst r5,1 + bld r18,4 + bst r9,1 + bld r18,5 + bst r13,1 + bld r18,6 + bst r25,1 + bld r18,7 + bst r5,2 + bld r19,0 + bst r9,2 + bld r19,1 + bst r13,2 + bld r19,2 + bst r25,2 + bld r19,3 + bst r5,3 + bld r19,4 + bst r9,3 + bld r19,5 + bst r13,3 + bld r19,6 + bst r25,3 + bld r19,7 + bst r5,4 + bld r20,0 + bst r9,4 + bld r20,1 + bst r13,4 + bld r20,2 + bst r25,4 + bld r20,3 + bst r5,5 + bld r20,4 + bst r9,5 + bld r20,5 + bst r13,5 + bld r20,6 + bst r25,5 + bld r20,7 + bst r5,6 + bld r21,0 + bst r9,6 + bld r21,1 + bst r13,6 + bld r21,2 + bst r25,6 + bld r21,3 + bst r5,7 + bld r21,4 + bst r9,7 + bld r21,5 + bst r13,7 + bld r21,6 + bst r25,7 + bld r21,7 + std Z+12,r18 + std Z+13,r19 + std Z+14,r20 + std Z+15,r21 + ldd r2,Z+16 + ldd r3,Z+17 + ldd r4,Z+18 + ldd r5,Z+19 + ldd r6,Z+20 + ldd r7,Z+21 + ldd r8,Z+22 + ldd r9,Z+23 + ldd r10,Z+24 + ldd r11,Z+25 + ldd r12,Z+26 + ldd r13,Z+27 + ldd r14,Z+28 + ldd r15,Z+29 + ldd r24,Z+30 + ldd r25,Z+31 + bst r2,0 + bld r18,0 + bst r6,0 + bld r18,1 + bst r10,0 + bld r18,2 + bst r14,0 + bld r18,3 + bst r2,1 + bld r18,4 + bst r6,1 + bld r18,5 + bst r10,1 + bld r18,6 + bst r14,1 + bld r18,7 + bst r2,2 + bld r19,0 + bst r6,2 + bld r19,1 + bst r10,2 + bld r19,2 + bst r14,2 + bld r19,3 + bst r2,3 + bld r19,4 + bst r6,3 + bld r19,5 + bst r10,3 + bld r19,6 + bst r14,3 + bld r19,7 + bst r2,4 + bld r20,0 + bst r6,4 + bld r20,1 + bst r10,4 + bld r20,2 + bst r14,4 + bld r20,3 + bst r2,5 + bld r20,4 + bst r6,5 + bld r20,5 + bst r10,5 + bld r20,6 + bst r14,5 + bld r20,7 + bst r2,6 + bld r21,0 + bst r6,6 + bld r21,1 + bst r10,6 + bld r21,2 + bst r14,6 + bld r21,3 + bst r2,7 + bld r21,4 + bst r6,7 + bld r21,5 + bst r10,7 + bld r21,6 + bst r14,7 + bld r21,7 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + bst r3,0 + bld r18,0 + bst r7,0 + bld r18,1 + bst r11,0 + bld r18,2 + bst r15,0 + bld r18,3 + bst r3,1 + bld r18,4 + bst r7,1 + bld r18,5 + bst r11,1 + bld r18,6 + bst r15,1 + bld r18,7 + bst r3,2 + bld r19,0 + bst r7,2 + bld r19,1 + bst r11,2 + bld r19,2 + bst r15,2 + bld r19,3 + bst r3,3 + bld r19,4 + bst r7,3 + bld r19,5 + bst r11,3 + bld r19,6 + bst r15,3 + bld r19,7 + bst r3,4 + bld r20,0 + bst r7,4 + bld r20,1 + bst r11,4 + bld r20,2 + bst r15,4 + bld r20,3 + bst r3,5 + bld r20,4 + bst r7,5 + bld r20,5 + bst r11,5 + bld r20,6 + bst r15,5 + bld r20,7 + bst r3,6 + bld r21,0 + bst r7,6 + bld r21,1 + bst r11,6 + bld r21,2 + bst r15,6 + bld r21,3 + bst r3,7 + bld r21,4 + bst r7,7 + bld r21,5 + bst r11,7 + bld r21,6 + bst r15,7 + bld r21,7 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + bst r4,0 + bld r18,0 + bst r8,0 + bld r18,1 + bst r12,0 + bld r18,2 + bst r24,0 + bld r18,3 + bst r4,1 + bld r18,4 + bst r8,1 + bld r18,5 + bst r12,1 + bld r18,6 + bst r24,1 + bld r18,7 + bst r4,2 + bld r19,0 + bst r8,2 + bld r19,1 + bst r12,2 + bld r19,2 + bst r24,2 + bld r19,3 + bst r4,3 + bld r19,4 + bst r8,3 + bld r19,5 + bst r12,3 + bld r19,6 + bst r24,3 + bld r19,7 + bst r4,4 + bld r20,0 + bst r8,4 + bld r20,1 + bst r12,4 + bld r20,2 + bst r24,4 + bld r20,3 + bst r4,5 + bld r20,4 + bst r8,5 + bld r20,5 + bst r12,5 + bld r20,6 + bst r24,5 + bld r20,7 + bst r4,6 + bld r21,0 + bst r8,6 + bld r21,1 + bst r12,6 + bld r21,2 + bst r24,6 + bld r21,3 + bst r4,7 + bld r21,4 + bst r8,7 + bld r21,5 + bst r12,7 + bld r21,6 + bst r24,7 + bld r21,7 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + bst r5,0 + bld r18,0 + bst r9,0 + bld r18,1 + bst r13,0 + bld r18,2 + bst r25,0 + bld r18,3 + bst r5,1 + bld r18,4 + bst r9,1 + bld r18,5 + bst r13,1 + bld r18,6 + bst r25,1 + bld r18,7 + bst r5,2 + bld r19,0 + bst r9,2 + bld r19,1 + bst r13,2 + bld r19,2 + bst r25,2 + bld r19,3 + bst r5,3 + bld r19,4 + bst r9,3 + bld r19,5 + bst r13,3 + bld r19,6 + bst r25,3 + bld r19,7 + bst r5,4 + bld r20,0 + bst r9,4 + bld r20,1 + bst r13,4 + bld r20,2 + bst r25,4 + bld r20,3 + bst r5,5 + bld r20,4 + bst r9,5 + bld r20,5 + bst r13,5 + bld r20,6 + bst r25,5 + bld r20,7 + bst r5,6 + bld r21,0 + bst r9,6 + bld r21,1 + bst r13,6 + bld r21,2 + bst r25,6 + bld r21,3 + bst r5,7 + bld r21,4 + bst r9,7 + bld r21,5 + bst r13,7 + bld r21,6 + bst r25,7 + bld r21,7 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size photon256_permute, .-photon256_permute + +#endif diff --git a/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/rhys/internal-photon256.c b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/rhys/internal-photon256.c index b8743fe..5cb7dd1 100644 --- a/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/rhys/internal-photon256.c +++ b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/rhys/internal-photon256.c @@ -23,6 +23,8 @@ #include "internal-photon256.h" #include "internal-util.h" +#if !defined(__AVR__) + /** * \brief Number of rounds in the PHOTON-256 permutation in bit-sliced form. */ @@ -477,3 +479,5 @@ void photon256_permute(unsigned char state[PHOTON256_STATE_SIZE]) /* Convert back from bit-sliced form to regular form */ photon256_from_sliced(state, S.bytes); } + +#endif /* !__AVR__ */ diff --git a/photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/rhys/internal-photon256-avr.S b/photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/rhys/internal-photon256-avr.S new file mode 100644 index 0000000..5826dd3 --- /dev/null +++ b/photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/rhys/internal-photon256-avr.S @@ -0,0 +1,2583 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .text +.global photon256_permute + .type photon256_permute, @function +photon256_permute: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 49 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + bst r18,0 + bld r2,0 + bst r18,1 + bld r6,0 + bst r18,2 + bld r10,0 + bst r18,3 + bld r14,0 + bst r18,4 + bld r2,1 + bst r18,5 + bld r6,1 + bst r18,6 + bld r10,1 + bst r18,7 + bld r14,1 + bst r19,0 + bld r2,2 + bst r19,1 + bld r6,2 + bst r19,2 + bld r10,2 + bst r19,3 + bld r14,2 + bst r19,4 + bld r2,3 + bst r19,5 + bld r6,3 + bst r19,6 + bld r10,3 + bst r19,7 + bld r14,3 + bst r20,0 + bld r2,4 + bst r20,1 + bld r6,4 + bst r20,2 + bld r10,4 + bst r20,3 + bld r14,4 + bst r20,4 + bld r2,5 + bst r20,5 + bld r6,5 + bst r20,6 + bld r10,5 + bst r20,7 + bld r14,5 + bst r21,0 + bld r2,6 + bst r21,1 + bld r6,6 + bst r21,2 + bld r10,6 + bst r21,3 + bld r14,6 + bst r21,4 + bld r2,7 + bst r21,5 + bld r6,7 + bst r21,6 + bld r10,7 + bst r21,7 + bld r14,7 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + bst r18,0 + bld r3,0 + bst r18,1 + bld r7,0 + bst r18,2 + bld r11,0 + bst r18,3 + bld r15,0 + bst r18,4 + bld r3,1 + bst r18,5 + bld r7,1 + bst r18,6 + bld r11,1 + bst r18,7 + bld r15,1 + bst r19,0 + bld r3,2 + bst r19,1 + bld r7,2 + bst r19,2 + bld r11,2 + bst r19,3 + bld r15,2 + bst r19,4 + bld r3,3 + bst r19,5 + bld r7,3 + bst r19,6 + bld r11,3 + bst r19,7 + bld r15,3 + bst r20,0 + bld r3,4 + bst r20,1 + bld r7,4 + bst r20,2 + bld r11,4 + bst r20,3 + bld r15,4 + bst r20,4 + bld r3,5 + bst r20,5 + bld r7,5 + bst r20,6 + bld r11,5 + bst r20,7 + bld r15,5 + bst r21,0 + bld r3,6 + bst r21,1 + bld r7,6 + bst r21,2 + bld r11,6 + bst r21,3 + bld r15,6 + bst r21,4 + bld r3,7 + bst r21,5 + bld r7,7 + bst r21,6 + bld r11,7 + bst r21,7 + bld r15,7 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + bst r18,0 + bld r4,0 + bst r18,1 + bld r8,0 + bst r18,2 + bld r12,0 + bst r18,3 + bld r24,0 + bst r18,4 + bld r4,1 + bst r18,5 + bld r8,1 + bst r18,6 + bld r12,1 + bst r18,7 + bld r24,1 + bst r19,0 + bld r4,2 + bst r19,1 + bld r8,2 + bst r19,2 + bld r12,2 + bst r19,3 + bld r24,2 + bst r19,4 + bld r4,3 + bst r19,5 + bld r8,3 + bst r19,6 + bld r12,3 + bst r19,7 + bld r24,3 + bst r20,0 + bld r4,4 + bst r20,1 + bld r8,4 + bst r20,2 + bld r12,4 + bst r20,3 + bld r24,4 + bst r20,4 + bld r4,5 + bst r20,5 + bld r8,5 + bst r20,6 + bld r12,5 + bst r20,7 + bld r24,5 + bst r21,0 + bld r4,6 + bst r21,1 + bld r8,6 + bst r21,2 + bld r12,6 + bst r21,3 + bld r24,6 + bst r21,4 + bld r4,7 + bst r21,5 + bld r8,7 + bst r21,6 + bld r12,7 + bst r21,7 + bld r24,7 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + bst r18,0 + bld r5,0 + bst r18,1 + bld r9,0 + bst r18,2 + bld r13,0 + bst r18,3 + bld r25,0 + bst r18,4 + bld r5,1 + bst r18,5 + bld r9,1 + bst r18,6 + bld r13,1 + bst r18,7 + bld r25,1 + bst r19,0 + bld r5,2 + bst r19,1 + bld r9,2 + bst r19,2 + bld r13,2 + bst r19,3 + bld r25,2 + bst r19,4 + bld r5,3 + bst r19,5 + bld r9,3 + bst r19,6 + bld r13,3 + bst r19,7 + bld r25,3 + bst r20,0 + bld r5,4 + bst r20,1 + bld r9,4 + bst r20,2 + bld r13,4 + bst r20,3 + bld r25,4 + bst r20,4 + bld r5,5 + bst r20,5 + bld r9,5 + bst r20,6 + bld r13,5 + bst r20,7 + bld r25,5 + bst r21,0 + bld r5,6 + bst r21,1 + bld r9,6 + bst r21,2 + bld r13,6 + bst r21,3 + bld r25,6 + bst r21,4 + bld r5,7 + bst r21,5 + bld r9,7 + bst r21,6 + bld r13,7 + bst r21,7 + bld r25,7 + std Z+16,r2 + std Z+17,r3 + std Z+18,r4 + std Z+19,r5 + std Z+20,r6 + std Z+21,r7 + std Z+22,r8 + std Z+23,r9 + std Z+24,r10 + std Z+25,r11 + std Z+26,r12 + std Z+27,r13 + std Z+28,r14 + std Z+29,r15 + std Z+30,r24 + std Z+31,r25 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + bst r18,0 + bld r2,0 + bst r18,1 + bld r6,0 + bst r18,2 + bld r10,0 + bst r18,3 + bld r14,0 + bst r18,4 + bld r2,1 + bst r18,5 + bld r6,1 + bst r18,6 + bld r10,1 + bst r18,7 + bld r14,1 + bst r19,0 + bld r2,2 + bst r19,1 + bld r6,2 + bst r19,2 + bld r10,2 + bst r19,3 + bld r14,2 + bst r19,4 + bld r2,3 + bst r19,5 + bld r6,3 + bst r19,6 + bld r10,3 + bst r19,7 + bld r14,3 + bst r20,0 + bld r2,4 + bst r20,1 + bld r6,4 + bst r20,2 + bld r10,4 + bst r20,3 + bld r14,4 + bst r20,4 + bld r2,5 + bst r20,5 + bld r6,5 + bst r20,6 + bld r10,5 + bst r20,7 + bld r14,5 + bst r21,0 + bld r2,6 + bst r21,1 + bld r6,6 + bst r21,2 + bld r10,6 + bst r21,3 + bld r14,6 + bst r21,4 + bld r2,7 + bst r21,5 + bld r6,7 + bst r21,6 + bld r10,7 + bst r21,7 + bld r14,7 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r20,Z+6 + ldd r21,Z+7 + bst r18,0 + bld r3,0 + bst r18,1 + bld r7,0 + bst r18,2 + bld r11,0 + bst r18,3 + bld r15,0 + bst r18,4 + bld r3,1 + bst r18,5 + bld r7,1 + bst r18,6 + bld r11,1 + bst r18,7 + bld r15,1 + bst r19,0 + bld r3,2 + bst r19,1 + bld r7,2 + bst r19,2 + bld r11,2 + bst r19,3 + bld r15,2 + bst r19,4 + bld r3,3 + bst r19,5 + bld r7,3 + bst r19,6 + bld r11,3 + bst r19,7 + bld r15,3 + bst r20,0 + bld r3,4 + bst r20,1 + bld r7,4 + bst r20,2 + bld r11,4 + bst r20,3 + bld r15,4 + bst r20,4 + bld r3,5 + bst r20,5 + bld r7,5 + bst r20,6 + bld r11,5 + bst r20,7 + bld r15,5 + bst r21,0 + bld r3,6 + bst r21,1 + bld r7,6 + bst r21,2 + bld r11,6 + bst r21,3 + bld r15,6 + bst r21,4 + bld r3,7 + bst r21,5 + bld r7,7 + bst r21,6 + bld r11,7 + bst r21,7 + bld r15,7 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r20,Z+10 + ldd r21,Z+11 + bst r18,0 + bld r4,0 + bst r18,1 + bld r8,0 + bst r18,2 + bld r12,0 + bst r18,3 + bld r24,0 + bst r18,4 + bld r4,1 + bst r18,5 + bld r8,1 + bst r18,6 + bld r12,1 + bst r18,7 + bld r24,1 + bst r19,0 + bld r4,2 + bst r19,1 + bld r8,2 + bst r19,2 + bld r12,2 + bst r19,3 + bld r24,2 + bst r19,4 + bld r4,3 + bst r19,5 + bld r8,3 + bst r19,6 + bld r12,3 + bst r19,7 + bld r24,3 + bst r20,0 + bld r4,4 + bst r20,1 + bld r8,4 + bst r20,2 + bld r12,4 + bst r20,3 + bld r24,4 + bst r20,4 + bld r4,5 + bst r20,5 + bld r8,5 + bst r20,6 + bld r12,5 + bst r20,7 + bld r24,5 + bst r21,0 + bld r4,6 + bst r21,1 + bld r8,6 + bst r21,2 + bld r12,6 + bst r21,3 + bld r24,6 + bst r21,4 + bld r4,7 + bst r21,5 + bld r8,7 + bst r21,6 + bld r12,7 + bst r21,7 + bld r24,7 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r20,Z+14 + ldd r21,Z+15 + bst r18,0 + bld r5,0 + bst r18,1 + bld r9,0 + bst r18,2 + bld r13,0 + bst r18,3 + bld r25,0 + bst r18,4 + bld r5,1 + bst r18,5 + bld r9,1 + bst r18,6 + bld r13,1 + bst r18,7 + bld r25,1 + bst r19,0 + bld r5,2 + bst r19,1 + bld r9,2 + bst r19,2 + bld r13,2 + bst r19,3 + bld r25,2 + bst r19,4 + bld r5,3 + bst r19,5 + bld r9,3 + bst r19,6 + bld r13,3 + bst r19,7 + bld r25,3 + bst r20,0 + bld r5,4 + bst r20,1 + bld r9,4 + bst r20,2 + bld r13,4 + bst r20,3 + bld r25,4 + bst r20,4 + bld r5,5 + bst r20,5 + bld r9,5 + bst r20,6 + bld r13,5 + bst r20,7 + bld r25,5 + bst r21,0 + bld r5,6 + bst r21,1 + bld r9,6 + bst r21,2 + bld r13,6 + bst r21,3 + bld r25,6 + bst r21,4 + bld r5,7 + bst r21,5 + bld r9,7 + bst r21,6 + bld r13,7 + bst r21,7 + bld r25,7 + ldi r22,225 + ldi r23,240 + ldi r26,210 + ldi r27,150 + rcall 621f + ldi r22,195 + ldi r23,210 + ldi r26,240 + ldi r27,180 + rcall 621f + ldi r22,135 + ldi r23,150 + ldi r26,180 + ldi r27,240 + rcall 621f + ldi r22,30 + ldi r23,15 + ldi r26,45 + ldi r27,105 + rcall 621f + ldi r22,45 + ldi r23,60 + ldi r26,30 + ldi r27,90 + rcall 621f + ldi r22,75 + ldi r23,90 + ldi r26,120 + ldi r27,60 + rcall 621f + ldi r22,150 + ldi r23,135 + ldi r26,165 + ldi r27,225 + rcall 621f + ldi r22,60 + ldi r23,45 + ldi r26,15 + ldi r27,75 + rcall 621f + ldi r22,105 + ldi r23,120 + ldi r26,90 + ldi r27,30 + rcall 621f + ldi r22,210 + ldi r23,195 + ldi r26,225 + ldi r27,165 + rcall 621f + ldi r22,165 + ldi r23,180 + ldi r26,150 + ldi r27,210 + rcall 621f + ldi r22,90 + ldi r23,75 + ldi r26,105 + ldi r27,45 + rcall 621f + rjmp 1960f +621: + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r2,r18 + eor r3,r19 + eor r4,r20 + eor r5,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r6,r18 + eor r7,r19 + eor r8,r20 + eor r9,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r10,r18 + eor r11,r19 + eor r12,r20 + eor r13,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r14,r18 + eor r15,r19 + eor r24,r20 + eor r25,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + eor r6,r10 + mov r0,r10 + and r0,r6 + eor r14,r0 + mov r18,r14 + and r14,r6 + eor r14,r10 + mov r16,r14 + eor r14,r2 + com r14 + mov r10,r14 + or r16,r2 + eor r2,r18 + eor r6,r2 + or r10,r6 + eor r10,r18 + eor r6,r16 + eor r14,r6 + eor r7,r11 + mov r0,r11 + and r0,r7 + eor r15,r0 + mov r19,r15 + and r15,r7 + eor r15,r11 + mov r16,r15 + eor r15,r3 + com r15 + mov r11,r15 + or r16,r3 + eor r3,r19 + eor r7,r3 + or r11,r7 + eor r11,r19 + eor r7,r16 + eor r15,r7 + eor r8,r12 + mov r0,r12 + and r0,r8 + eor r24,r0 + mov r20,r24 + and r24,r8 + eor r24,r12 + mov r16,r24 + eor r24,r4 + com r24 + mov r12,r24 + or r16,r4 + eor r4,r20 + eor r8,r4 + or r12,r8 + eor r12,r20 + eor r8,r16 + eor r24,r8 + eor r9,r13 + mov r0,r13 + and r0,r9 + eor r25,r0 + mov r21,r25 + and r25,r9 + eor r25,r13 + mov r16,r25 + eor r25,r5 + com r25 + mov r13,r25 + or r16,r5 + eor r5,r21 + eor r9,r5 + or r13,r9 + eor r13,r21 + eor r9,r16 + eor r25,r9 + bst r3,0 + lsr r3 + bld r3,7 + bst r7,0 + lsr r7 + bld r7,7 + bst r11,0 + lsr r11 + bld r11,7 + bst r15,0 + lsr r15 + bld r15,7 + mov r0,r1 + lsr r4 + ror r0 + lsr r4 + ror r0 + or r4,r0 + mov r0,r1 + lsr r8 + ror r0 + lsr r8 + ror r0 + or r8,r0 + mov r0,r1 + lsr r12 + ror r0 + lsr r12 + ror r0 + or r12,r0 + mov r0,r1 + lsr r24 + ror r0 + lsr r24 + ror r0 + or r24,r0 + mov r0,r1 + lsr r5 + ror r0 + lsr r5 + ror r0 + lsr r5 + ror r0 + or r5,r0 + mov r0,r1 + lsr r9 + ror r0 + lsr r9 + ror r0 + lsr r9 + ror r0 + or r9,r0 + mov r0,r1 + lsr r13 + ror r0 + lsr r13 + ror r0 + lsr r13 + ror r0 + or r13,r0 + mov r0,r1 + lsr r25 + ror r0 + lsr r25 + ror r0 + lsr r25 + ror r0 + or r25,r0 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Z+16 + ldd r3,Z+17 + ldd r4,Z+18 + ldd r5,Z+19 + ldd r6,Z+20 + ldd r7,Z+21 + ldd r8,Z+22 + ldd r9,Z+23 + ldd r10,Z+24 + ldd r11,Z+25 + ldd r12,Z+26 + ldd r13,Z+27 + ldd r14,Z+28 + ldd r15,Z+29 + ldd r24,Z+30 + ldd r25,Z+31 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r2,r18 + eor r3,r19 + eor r4,r20 + eor r5,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r6,r18 + eor r7,r19 + eor r8,r20 + eor r9,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r10,r18 + eor r11,r19 + eor r12,r20 + eor r13,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + eor r14,r22 + eor r15,r23 + eor r24,r26 + eor r25,r27 + eor r6,r10 + mov r0,r10 + and r0,r6 + eor r14,r0 + mov r18,r14 + and r14,r6 + eor r14,r10 + mov r22,r14 + eor r14,r2 + com r14 + mov r10,r14 + or r22,r2 + eor r2,r18 + eor r6,r2 + or r10,r6 + eor r10,r18 + eor r6,r22 + eor r14,r6 + eor r7,r11 + mov r0,r11 + and r0,r7 + eor r15,r0 + mov r19,r15 + and r15,r7 + eor r15,r11 + mov r22,r15 + eor r15,r3 + com r15 + mov r11,r15 + or r22,r3 + eor r3,r19 + eor r7,r3 + or r11,r7 + eor r11,r19 + eor r7,r22 + eor r15,r7 + eor r8,r12 + mov r0,r12 + and r0,r8 + eor r24,r0 + mov r20,r24 + and r24,r8 + eor r24,r12 + mov r22,r24 + eor r24,r4 + com r24 + mov r12,r24 + or r22,r4 + eor r4,r20 + eor r8,r4 + or r12,r8 + eor r12,r20 + eor r8,r22 + eor r24,r8 + eor r9,r13 + mov r0,r13 + and r0,r9 + eor r25,r0 + mov r21,r25 + and r25,r9 + eor r25,r13 + mov r22,r25 + eor r25,r5 + com r25 + mov r13,r25 + or r22,r5 + eor r5,r21 + eor r9,r5 + or r13,r9 + eor r13,r21 + eor r9,r22 + eor r25,r9 + swap r2 + swap r6 + swap r10 + swap r14 + lsl r3 + adc r3,r1 + lsl r3 + adc r3,r1 + lsl r3 + adc r3,r1 + lsl r7 + adc r7,r1 + lsl r7 + adc r7,r1 + lsl r7 + adc r7,r1 + lsl r11 + adc r11,r1 + lsl r11 + adc r11,r1 + lsl r11 + adc r11,r1 + lsl r15 + adc r15,r1 + lsl r15 + adc r15,r1 + lsl r15 + adc r15,r1 + lsl r4 + adc r4,r1 + lsl r4 + adc r4,r1 + lsl r8 + adc r8,r1 + lsl r8 + adc r8,r1 + lsl r12 + adc r12,r1 + lsl r12 + adc r12,r1 + lsl r24 + adc r24,r1 + lsl r24 + adc r24,r1 + lsl r5 + adc r5,r1 + lsl r9 + adc r9,r1 + lsl r13 + adc r13,r1 + lsl r25 + adc r25,r1 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r6,Y+2 + ldd r10,Y+3 + ldd r14,Y+4 + ldd r3,Y+5 + ldd r7,Y+6 + ldd r11,Y+7 + ldd r15,Y+8 + ldd r4,Y+9 + ldd r8,Y+10 + ldd r12,Y+11 + ldd r24,Y+12 + ldd r5,Y+13 + ldd r9,Y+14 + ldd r13,Y+15 + ldd r25,Y+16 + movw r22,r2 + movw r26,r4 + eor r22,r27 + mov r18,r27 + mov r19,r22 + mov r20,r23 + mov r21,r26 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + st Z,r18 + std Z+4,r19 + std Z+8,r20 + std Z+12,r21 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + movw r18,r26 + movw r20,r22 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+1,r18 + std Z+5,r19 + std Z+9,r20 + std Z+13,r21 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + movw r18,r26 + movw r20,r22 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+2,r18 + std Z+6,r19 + std Z+10,r20 + std Z+14,r21 + movw r18,r2 + movw r20,r4 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + std Z+3,r18 + std Z+7,r19 + std Z+11,r20 + std Z+15,r21 + movw r18,r2 + movw r20,r4 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+16,r18 + std Z+20,r19 + std Z+24,r20 + std Z+28,r21 + movw r18,r2 + movw r20,r4 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+17,r18 + std Z+21,r19 + std Z+25,r20 + std Z+29,r21 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + movw r18,r26 + movw r20,r22 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+18,r18 + std Z+22,r19 + std Z+26,r20 + std Z+30,r21 + movw r18,r2 + movw r20,r4 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+19,r18 + std Z+23,r19 + std Z+27,r20 + std Z+31,r21 + ldd r2,Y+17 + ldd r6,Y+18 + ldd r10,Y+19 + ldd r14,Y+20 + ldd r3,Y+21 + ldd r7,Y+22 + ldd r11,Y+23 + ldd r15,Y+24 + ldd r4,Y+25 + ldd r8,Y+26 + ldd r12,Y+27 + ldd r24,Y+28 + ldd r5,Y+29 + ldd r9,Y+30 + ldd r13,Y+31 + ldd r25,Y+32 + ld r18,Z + ldd r19,Z+4 + ldd r20,Z+8 + ldd r21,Z+12 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + st Z,r18 + std Z+4,r19 + std Z+8,r20 + std Z+12,r21 + ldd r18,Z+1 + ldd r19,Z+5 + ldd r20,Z+9 + ldd r21,Z+13 + eor r18,r2 + eor r19,r3 + eor r20,r4 + eor r21,r5 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + std Z+1,r18 + std Z+5,r19 + std Z+9,r20 + std Z+13,r21 + ldd r18,Z+2 + ldd r19,Z+6 + ldd r20,Z+10 + ldd r21,Z+14 + eor r18,r2 + eor r19,r3 + eor r20,r4 + eor r21,r5 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+2,r18 + std Z+6,r19 + std Z+10,r20 + std Z+14,r21 + ldd r18,Z+3 + ldd r19,Z+7 + ldd r20,Z+11 + ldd r21,Z+15 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+3,r18 + std Z+7,r19 + std Z+11,r20 + std Z+15,r21 + ldd r18,Z+16 + ldd r19,Z+20 + ldd r20,Z+24 + ldd r21,Z+28 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+16,r18 + std Z+20,r19 + std Z+24,r20 + std Z+28,r21 + ldd r18,Z+17 + ldd r19,Z+21 + ldd r20,Z+25 + ldd r21,Z+29 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + std Z+17,r18 + std Z+21,r19 + std Z+25,r20 + std Z+29,r21 + ldd r18,Z+18 + ldd r19,Z+22 + ldd r20,Z+26 + ldd r21,Z+30 + eor r18,r2 + eor r19,r3 + eor r20,r4 + eor r21,r5 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+18,r18 + std Z+22,r19 + std Z+26,r20 + std Z+30,r21 + ldd r18,Z+19 + ldd r19,Z+23 + ldd r20,Z+27 + ldd r21,Z+31 + eor r18,r2 + eor r19,r3 + eor r20,r4 + eor r21,r5 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + std Z+19,r18 + std Z+23,r19 + std Z+27,r20 + std Z+31,r21 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r24,Z+14 + ldd r25,Z+15 + ret +1960: + bst r2,0 + bld r18,0 + bst r6,0 + bld r18,1 + bst r10,0 + bld r18,2 + bst r14,0 + bld r18,3 + bst r2,1 + bld r18,4 + bst r6,1 + bld r18,5 + bst r10,1 + bld r18,6 + bst r14,1 + bld r18,7 + bst r2,2 + bld r19,0 + bst r6,2 + bld r19,1 + bst r10,2 + bld r19,2 + bst r14,2 + bld r19,3 + bst r2,3 + bld r19,4 + bst r6,3 + bld r19,5 + bst r10,3 + bld r19,6 + bst r14,3 + bld r19,7 + bst r2,4 + bld r20,0 + bst r6,4 + bld r20,1 + bst r10,4 + bld r20,2 + bst r14,4 + bld r20,3 + bst r2,5 + bld r20,4 + bst r6,5 + bld r20,5 + bst r10,5 + bld r20,6 + bst r14,5 + bld r20,7 + bst r2,6 + bld r21,0 + bst r6,6 + bld r21,1 + bst r10,6 + bld r21,2 + bst r14,6 + bld r21,3 + bst r2,7 + bld r21,4 + bst r6,7 + bld r21,5 + bst r10,7 + bld r21,6 + bst r14,7 + bld r21,7 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + bst r3,0 + bld r18,0 + bst r7,0 + bld r18,1 + bst r11,0 + bld r18,2 + bst r15,0 + bld r18,3 + bst r3,1 + bld r18,4 + bst r7,1 + bld r18,5 + bst r11,1 + bld r18,6 + bst r15,1 + bld r18,7 + bst r3,2 + bld r19,0 + bst r7,2 + bld r19,1 + bst r11,2 + bld r19,2 + bst r15,2 + bld r19,3 + bst r3,3 + bld r19,4 + bst r7,3 + bld r19,5 + bst r11,3 + bld r19,6 + bst r15,3 + bld r19,7 + bst r3,4 + bld r20,0 + bst r7,4 + bld r20,1 + bst r11,4 + bld r20,2 + bst r15,4 + bld r20,3 + bst r3,5 + bld r20,4 + bst r7,5 + bld r20,5 + bst r11,5 + bld r20,6 + bst r15,5 + bld r20,7 + bst r3,6 + bld r21,0 + bst r7,6 + bld r21,1 + bst r11,6 + bld r21,2 + bst r15,6 + bld r21,3 + bst r3,7 + bld r21,4 + bst r7,7 + bld r21,5 + bst r11,7 + bld r21,6 + bst r15,7 + bld r21,7 + std Z+4,r18 + std Z+5,r19 + std Z+6,r20 + std Z+7,r21 + bst r4,0 + bld r18,0 + bst r8,0 + bld r18,1 + bst r12,0 + bld r18,2 + bst r24,0 + bld r18,3 + bst r4,1 + bld r18,4 + bst r8,1 + bld r18,5 + bst r12,1 + bld r18,6 + bst r24,1 + bld r18,7 + bst r4,2 + bld r19,0 + bst r8,2 + bld r19,1 + bst r12,2 + bld r19,2 + bst r24,2 + bld r19,3 + bst r4,3 + bld r19,4 + bst r8,3 + bld r19,5 + bst r12,3 + bld r19,6 + bst r24,3 + bld r19,7 + bst r4,4 + bld r20,0 + bst r8,4 + bld r20,1 + bst r12,4 + bld r20,2 + bst r24,4 + bld r20,3 + bst r4,5 + bld r20,4 + bst r8,5 + bld r20,5 + bst r12,5 + bld r20,6 + bst r24,5 + bld r20,7 + bst r4,6 + bld r21,0 + bst r8,6 + bld r21,1 + bst r12,6 + bld r21,2 + bst r24,6 + bld r21,3 + bst r4,7 + bld r21,4 + bst r8,7 + bld r21,5 + bst r12,7 + bld r21,6 + bst r24,7 + bld r21,7 + std Z+8,r18 + std Z+9,r19 + std Z+10,r20 + std Z+11,r21 + bst r5,0 + bld r18,0 + bst r9,0 + bld r18,1 + bst r13,0 + bld r18,2 + bst r25,0 + bld r18,3 + bst r5,1 + bld r18,4 + bst r9,1 + bld r18,5 + bst r13,1 + bld r18,6 + bst r25,1 + bld r18,7 + bst r5,2 + bld r19,0 + bst r9,2 + bld r19,1 + bst r13,2 + bld r19,2 + bst r25,2 + bld r19,3 + bst r5,3 + bld r19,4 + bst r9,3 + bld r19,5 + bst r13,3 + bld r19,6 + bst r25,3 + bld r19,7 + bst r5,4 + bld r20,0 + bst r9,4 + bld r20,1 + bst r13,4 + bld r20,2 + bst r25,4 + bld r20,3 + bst r5,5 + bld r20,4 + bst r9,5 + bld r20,5 + bst r13,5 + bld r20,6 + bst r25,5 + bld r20,7 + bst r5,6 + bld r21,0 + bst r9,6 + bld r21,1 + bst r13,6 + bld r21,2 + bst r25,6 + bld r21,3 + bst r5,7 + bld r21,4 + bst r9,7 + bld r21,5 + bst r13,7 + bld r21,6 + bst r25,7 + bld r21,7 + std Z+12,r18 + std Z+13,r19 + std Z+14,r20 + std Z+15,r21 + ldd r2,Z+16 + ldd r3,Z+17 + ldd r4,Z+18 + ldd r5,Z+19 + ldd r6,Z+20 + ldd r7,Z+21 + ldd r8,Z+22 + ldd r9,Z+23 + ldd r10,Z+24 + ldd r11,Z+25 + ldd r12,Z+26 + ldd r13,Z+27 + ldd r14,Z+28 + ldd r15,Z+29 + ldd r24,Z+30 + ldd r25,Z+31 + bst r2,0 + bld r18,0 + bst r6,0 + bld r18,1 + bst r10,0 + bld r18,2 + bst r14,0 + bld r18,3 + bst r2,1 + bld r18,4 + bst r6,1 + bld r18,5 + bst r10,1 + bld r18,6 + bst r14,1 + bld r18,7 + bst r2,2 + bld r19,0 + bst r6,2 + bld r19,1 + bst r10,2 + bld r19,2 + bst r14,2 + bld r19,3 + bst r2,3 + bld r19,4 + bst r6,3 + bld r19,5 + bst r10,3 + bld r19,6 + bst r14,3 + bld r19,7 + bst r2,4 + bld r20,0 + bst r6,4 + bld r20,1 + bst r10,4 + bld r20,2 + bst r14,4 + bld r20,3 + bst r2,5 + bld r20,4 + bst r6,5 + bld r20,5 + bst r10,5 + bld r20,6 + bst r14,5 + bld r20,7 + bst r2,6 + bld r21,0 + bst r6,6 + bld r21,1 + bst r10,6 + bld r21,2 + bst r14,6 + bld r21,3 + bst r2,7 + bld r21,4 + bst r6,7 + bld r21,5 + bst r10,7 + bld r21,6 + bst r14,7 + bld r21,7 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + bst r3,0 + bld r18,0 + bst r7,0 + bld r18,1 + bst r11,0 + bld r18,2 + bst r15,0 + bld r18,3 + bst r3,1 + bld r18,4 + bst r7,1 + bld r18,5 + bst r11,1 + bld r18,6 + bst r15,1 + bld r18,7 + bst r3,2 + bld r19,0 + bst r7,2 + bld r19,1 + bst r11,2 + bld r19,2 + bst r15,2 + bld r19,3 + bst r3,3 + bld r19,4 + bst r7,3 + bld r19,5 + bst r11,3 + bld r19,6 + bst r15,3 + bld r19,7 + bst r3,4 + bld r20,0 + bst r7,4 + bld r20,1 + bst r11,4 + bld r20,2 + bst r15,4 + bld r20,3 + bst r3,5 + bld r20,4 + bst r7,5 + bld r20,5 + bst r11,5 + bld r20,6 + bst r15,5 + bld r20,7 + bst r3,6 + bld r21,0 + bst r7,6 + bld r21,1 + bst r11,6 + bld r21,2 + bst r15,6 + bld r21,3 + bst r3,7 + bld r21,4 + bst r7,7 + bld r21,5 + bst r11,7 + bld r21,6 + bst r15,7 + bld r21,7 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + bst r4,0 + bld r18,0 + bst r8,0 + bld r18,1 + bst r12,0 + bld r18,2 + bst r24,0 + bld r18,3 + bst r4,1 + bld r18,4 + bst r8,1 + bld r18,5 + bst r12,1 + bld r18,6 + bst r24,1 + bld r18,7 + bst r4,2 + bld r19,0 + bst r8,2 + bld r19,1 + bst r12,2 + bld r19,2 + bst r24,2 + bld r19,3 + bst r4,3 + bld r19,4 + bst r8,3 + bld r19,5 + bst r12,3 + bld r19,6 + bst r24,3 + bld r19,7 + bst r4,4 + bld r20,0 + bst r8,4 + bld r20,1 + bst r12,4 + bld r20,2 + bst r24,4 + bld r20,3 + bst r4,5 + bld r20,4 + bst r8,5 + bld r20,5 + bst r12,5 + bld r20,6 + bst r24,5 + bld r20,7 + bst r4,6 + bld r21,0 + bst r8,6 + bld r21,1 + bst r12,6 + bld r21,2 + bst r24,6 + bld r21,3 + bst r4,7 + bld r21,4 + bst r8,7 + bld r21,5 + bst r12,7 + bld r21,6 + bst r24,7 + bld r21,7 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + bst r5,0 + bld r18,0 + bst r9,0 + bld r18,1 + bst r13,0 + bld r18,2 + bst r25,0 + bld r18,3 + bst r5,1 + bld r18,4 + bst r9,1 + bld r18,5 + bst r13,1 + bld r18,6 + bst r25,1 + bld r18,7 + bst r5,2 + bld r19,0 + bst r9,2 + bld r19,1 + bst r13,2 + bld r19,2 + bst r25,2 + bld r19,3 + bst r5,3 + bld r19,4 + bst r9,3 + bld r19,5 + bst r13,3 + bld r19,6 + bst r25,3 + bld r19,7 + bst r5,4 + bld r20,0 + bst r9,4 + bld r20,1 + bst r13,4 + bld r20,2 + bst r25,4 + bld r20,3 + bst r5,5 + bld r20,4 + bst r9,5 + bld r20,5 + bst r13,5 + bld r20,6 + bst r25,5 + bld r20,7 + bst r5,6 + bld r21,0 + bst r9,6 + bld r21,1 + bst r13,6 + bld r21,2 + bst r25,6 + bld r21,3 + bst r5,7 + bld r21,4 + bst r9,7 + bld r21,5 + bst r13,7 + bld r21,6 + bst r25,7 + bld r21,7 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size photon256_permute, .-photon256_permute + +#endif diff --git a/photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/rhys/internal-photon256.c b/photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/rhys/internal-photon256.c index b8743fe..5cb7dd1 100644 --- a/photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/rhys/internal-photon256.c +++ b/photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/rhys/internal-photon256.c @@ -23,6 +23,8 @@ #include "internal-photon256.h" #include "internal-util.h" +#if !defined(__AVR__) + /** * \brief Number of rounds in the PHOTON-256 permutation in bit-sliced form. */ @@ -477,3 +479,5 @@ void photon256_permute(unsigned char state[PHOTON256_STATE_SIZE]) /* Convert back from bit-sliced form to regular form */ photon256_from_sliced(state, S.bytes); } + +#endif /* !__AVR__ */ diff --git a/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/api.h b/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/api.h new file mode 100644 index 0000000..a4aa567 --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 diff --git a/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/encrypt.c b/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/encrypt.c new file mode 100644 index 0000000..495399b --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/encrypt.c @@ -0,0 +1,1337 @@ +/* + * Date: 29 November 2018 + * Contact: Thomas Peyrin - thomas.peyrin@gmail.com + * Mustafa Khairallah - mustafam001@e.ntu.edu.sg + */ + +#include "crypto_aead.h" +#include "api.h" +#include "skinny.h" +#include +#include + +void pad (const unsigned char* m, unsigned char* mp, int len8) { + +#ifdef ___ENABLE_DWORD_CAST + + if (0 == len8) { + *(uint64_t*)(&mp[0]) = 0; + *(uint64_t*)(&mp[8]) = 0; + } else if (8 > len8) { + *(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]) & (0xffffffffffffffff >> (64 - len8*8)); + *(uint64_t*)(&mp[8]) = 0; + mp[15] = len8; + } else if (8 == len8) { + *(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]); + *(uint64_t*)(&mp[8]) = 0; + mp[15] = 8; + } else if (16 > len8) { + *(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]); + *(uint64_t*)(&mp[8]) = *(uint64_t*)(&m[8]) & (0xffffffffffffffff >> (128 - len8*8)); + mp[15] = len8; + } else { + *(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]); + *(uint64_t*)(&mp[8]) = *(uint64_t*)(&m[8]); + } + +#else + + if (0 == len8) { + *(uint32_t*)(&mp[0]) = 0; + *(uint32_t*)(&mp[4]) = 0; + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + } else if (4 > len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]) & (0xffffffff >> (32 - len8*8)); + *(uint32_t*)(&mp[4]) = 0; + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = len8; + } else if (4 == len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = 0; + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = 4; + } else if (8 > len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]) & (0xffffffff >> (64 - len8*8)); + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = len8; + } else if (8 == len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = 8; + } else if (12 > len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]) & (0xffffffff >> (96 - len8*8)); + *(uint32_t*)(&mp[12]) = 0; + mp[15] = len8; + } else if (12 == len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]); + *(uint32_t*)(&mp[12]) = 0; + mp[15] = 12; + } else if (16 > len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]); + *(uint32_t*)(&mp[12]) = *(uint32_t*)(&m[12]) & (0xffffffff >> (128 - len8*8)); + mp[15] = len8; + } else { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]); + *(uint32_t*)(&mp[12]) = *(uint32_t*)(&m[12]); + } + +#endif + +} + +void g8A (unsigned char* s, unsigned char* c) { + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t s0 = *(uint64_t*)(&s[0]); + uint64_t s1 = *(uint64_t*)(&s[8]); + + uint64_t c0, c1; + + c0 = ((s0 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x8080808080808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x8080808080808080); + + *(uint64_t*)(&c[0]) = c0; + *(uint64_t*)(&c[8]) = c1; + +#else + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t c0, c1, c2, c3; + + c0 = ((s0 >> 1) & 0x7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x80808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x80808080); + c2 = ((s2 >> 1) & 0x7f7f7f7f) ^ ((s2 ^ (s2 << 7)) & 0x80808080); + c3 = ((s3 >> 1) & 0x7f7f7f7f) ^ ((s3 ^ (s3 << 7)) & 0x80808080); + + *(uint32_t*)(&c[0]) = c0; + *(uint32_t*)(&c[4]) = c1; + *(uint32_t*)(&c[8]) = c2; + *(uint32_t*)(&c[12]) = c3; + +#endif + +} + +void g8A_for_Tag_Generation (unsigned char* s, unsigned char* c) { + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t s0 = *(uint64_t*)(&s[0]); + uint64_t s1 = *(uint64_t*)(&s[8]); + + uint64_t c0, c1; + + c0 = ((s0 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x8080808080808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x8080808080808080); + + // use byte access because of memory alignment. + // c is not always in word(4 byte) alignment. + c[0] = c0 &0xFF; + c[1] = (c0>>8) &0xFF; + c[2] = (c0>>16)&0xFF; + c[3] = (c0>>24)&0xFF; + c[4] = (c0>>32)&0xFF; + c[5] = (c0>>40)&0xFF; + c[6] = (c0>>48)&0xFF; + c[7] = c0>>56; + c[8] = c1 &0xFF; + c[9] = (c1>>8) &0xFF; + c[10] = (c1>>16)&0xFF; + c[11] = (c1>>24)&0xFF; + c[12] = (c1>>32)&0xFF; + c[13] = (c1>>40)&0xFF; + c[14] = (c1>>48)&0xFF; + c[15] = c1>>56; + +#else + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t c0, c1, c2, c3; + + c0 = ((s0 >> 1) & 0x7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x80808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x80808080); + c2 = ((s2 >> 1) & 0x7f7f7f7f) ^ ((s2 ^ (s2 << 7)) & 0x80808080); + c3 = ((s3 >> 1) & 0x7f7f7f7f) ^ ((s3 ^ (s3 << 7)) & 0x80808080); + + // use byte access because of memory alignment. + // c is not always in word(4 byte) alignment. + c[0] = c0 &0xFF; + c[1] = (c0>>8) &0xFF; + c[2] = (c0>>16)&0xFF; + c[3] = c0>>24; + c[4] = c1 &0xFF; + c[5] = (c1>>8) &0xFF; + c[6] = (c1>>16)&0xFF; + c[7] = c1>>24; + c[8] = c2 &0xFF; + c[9] = (c2>>8) &0xFF; + c[10] = (c2>>16)&0xFF; + c[11] = c2>>24; + c[12] = c3 &0xFF; + c[13] = (c3>>8) &0xFF; + c[14] = (c3>>16)&0xFF; + c[15] = c3>>24; + +#endif + +} + +void rho_ad_eqov16 ( + const unsigned char* m, + unsigned char* s) { + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) ^= *(uint64_t*)(&m[0]); + *(uint64_t*)(&s[8]) ^= *(uint64_t*)(&m[8]); + +#else + + *(uint32_t*)(&s[0]) ^= *(uint32_t*)(&m[0]); + *(uint32_t*)(&s[4]) ^= *(uint32_t*)(&m[4]); + *(uint32_t*)(&s[8]) ^= *(uint32_t*)(&m[8]); + *(uint32_t*)(&s[12]) ^= *(uint32_t*)(&m[12]); + +#endif + +} + +void rho_ad_ud16 ( + const unsigned char* m, + unsigned char* s, + int len8) { + + unsigned char mp [16]; + pad(m,mp,len8); + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) ^= *(uint64_t*)(&mp[0]); + *(uint64_t*)(&s[8]) ^= *(uint64_t*)(&mp[8]); + +#else + + *(uint32_t*)(&s[0]) ^= *(uint32_t*)(&mp[0]); + *(uint32_t*)(&s[4]) ^= *(uint32_t*)(&mp[4]); + *(uint32_t*)(&s[8]) ^= *(uint32_t*)(&mp[8]); + *(uint32_t*)(&s[12]) ^= *(uint32_t*)(&mp[12]); + +#endif + +} + +void rho_eqov16 ( + const unsigned char* m, + unsigned char* c, + unsigned char* s) { + + g8A(s,c); + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t c0 = *(uint64_t*)(&c[0]); + uint64_t c1 = *(uint64_t*)(&c[8]); + + uint64_t s0 = *(uint64_t*)(&s[0]); + uint64_t s1 = *(uint64_t*)(&s[8]); + + uint64_t m0 = *(uint64_t*)(&m[0]); + uint64_t m1 = *(uint64_t*)(&m[8]); + + s0 ^= m0; + s1 ^= m1; + + c0 ^= m0; + c1 ^= m1; + + *(uint64_t*)(&s[0]) = s0; + *(uint64_t*)(&s[8]) = s1; + + *(uint64_t*)(&c[0]) = c0; + *(uint64_t*)(&c[8]) = c1; + +#else + + uint32_t c0 = *(uint32_t*)(&c[0]); + uint32_t c1 = *(uint32_t*)(&c[4]); + uint32_t c2 = *(uint32_t*)(&c[8]); + uint32_t c3 = *(uint32_t*)(&c[12]); + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t m0 = *(uint32_t*)(&m[0]); + uint32_t m1 = *(uint32_t*)(&m[4]); + uint32_t m2 = *(uint32_t*)(&m[8]); + uint32_t m3 = *(uint32_t*)(&m[12]); + + s0 ^= m0; + s1 ^= m1; + s2 ^= m2; + s3 ^= m3; + + c0 ^= m0; + c1 ^= m1; + c2 ^= m2; + c3 ^= m3; + + *(uint32_t*)(&s[0]) = s0; + *(uint32_t*)(&s[4]) = s1; + *(uint32_t*)(&s[8]) = s2; + *(uint32_t*)(&s[12]) = s3; + + *(uint32_t*)(&c[0]) = c0; + *(uint32_t*)(&c[4]) = c1; + *(uint32_t*)(&c[8]) = c2; + *(uint32_t*)(&c[12]) = c3; + +#endif + +} + +void rho_ud16 ( + const unsigned char* m, + unsigned char* c, + unsigned char* s, + int len8) { + + unsigned char mp [16]; + + pad(m,mp,len8); + + g8A(s,c); + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t mp0 = *(uint64_t*)&mp[0]; + uint64_t mp1 = *(uint64_t*)&mp[8]; + uint64_t c0 = *(uint64_t*)&c[0]; + uint64_t c1 = *(uint64_t*)&c[8]; + + *(uint64_t*)(&s[0]) ^= mp0; + *(uint64_t*)(&s[8]) ^= mp1; + + if (0 == len8) { + c0 = 0; + c1 = 0; + } else if (8 > len8) { + c0 = c0 ^ (mp0 & 0xffffffffffffffff >> (64 - (len8*8))); + c0 = c0 ^ (c0 & 0xffffffffffffffff << ( (len8*8))); + c1 = 0; + } else if (8 == len8) { + c0 = c0 ^ mp0; + c1 = 0; + } else if (16 > len8) { + len8 -= 8; + c0 = c0 ^ mp0; + c1 = c1 ^ (mp1 & 0xffffffffffffffff >> (64 - (len8*8))); + c1 = c1 ^ (c1 & 0xffffffffffffffff << ( (len8*8))); + } else { + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + } + + *(uint64_t*)&c[0] = c0; + *(uint64_t*)&c[8] = c1; + +#else + + uint32_t mp0 = *(uint32_t*)&mp[0]; + uint32_t mp1 = *(uint32_t*)&mp[4]; + uint32_t mp2 = *(uint32_t*)&mp[8]; + uint32_t mp3 = *(uint32_t*)&mp[12]; + uint32_t c0 = *(uint32_t*)&c[0]; + uint32_t c1 = *(uint32_t*)&c[4]; + uint32_t c2 = *(uint32_t*)&c[8]; + uint32_t c3 = *(uint32_t*)&c[12]; + + *(uint32_t*)(&s[0]) ^= mp0; + *(uint32_t*)(&s[4]) ^= mp1; + *(uint32_t*)(&s[8]) ^= mp2; + *(uint32_t*)(&s[12]) ^= mp3; + + if (0 == len8) { + c0 = 0; + c1 = 0; + c2 = 0; + c3 = 0; + } else if (4 > len8) { + c0 = c0 ^ (mp0 & 0xffffffff >> (32 - (len8*8))); + c0 = c0 ^ (c0 & 0xffffffff << ( (len8*8))); + c1 = 0; + c2 = 0; + c3 = 0; + } else if (4 == len8) { + c0 = c0 ^ mp0; + c1 = 0; + c2 = 0; + c3 = 0; + } else if (8 > len8) { + len8 -= 4; + c0 = c0 ^ mp0; + c1 = c1 ^ (mp1 & 0xffffffff >> (32 - (len8*8))); + c1 = c1 ^ (c1 & 0xffffffff << ( (len8*8))); + c2 = 0; + c3 = 0; + } else if (8 == len8) { + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = 0; + c3 = 0; + } else if (12 > len8) { + len8 -= 8; + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = c2 ^ (mp2 & 0xffffffff >> (32 - (len8*8))); + c2 = c2 ^ (c2 & 0xffffffff << ( (len8*8))); + c3 = 0; + } else if (12 == len8) { + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = c2 ^ mp2; + c3 = 0; + } else if (16 > len8) { + len8 -= 12; + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = c2 ^ mp2; + c3 = c3 ^ (mp3 & 0xffffffff >> (32 - (len8*8))); + c3 = c3 ^ (c3 & 0xffffffff << ( (len8*8))); + } else { + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = c2 ^ mp2; + c3 = c3 ^ mp3; + } + + *(uint32_t*)&c[0] = c0; + *(uint32_t*)&c[4] = c1; + *(uint32_t*)&c[8] = c2; + *(uint32_t*)&c[12] = c3; + +#endif + +} + +void irho_eqov16 ( + unsigned char* m, + const unsigned char* c, + unsigned char* s) { + + g8A(s,m); + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t c0 = *(uint64_t*)(&c[0]); + uint64_t c1 = *(uint64_t*)(&c[8]); + + uint64_t s0 = *(uint64_t*)(&s[0]); + uint64_t s1 = *(uint64_t*)(&s[8]); + + uint64_t m0 = *(uint64_t*)(&m[0]); + uint64_t m1 = *(uint64_t*)(&m[8]); + + s0 ^= c0 ^ m0; + s1 ^= c1 ^ m1; + + m0 ^= c0; + m1 ^= c1; + + *(uint64_t*)(&s[0]) = s0; + *(uint64_t*)(&s[8]) = s1; + + *(uint64_t*)(&m[0]) = m0; + *(uint64_t*)(&m[8]) = m1; + +#else + + uint32_t c0 = *(uint32_t*)(&c[0]); + uint32_t c1 = *(uint32_t*)(&c[4]); + uint32_t c2 = *(uint32_t*)(&c[8]); + uint32_t c3 = *(uint32_t*)(&c[12]); + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t m0 = *(uint32_t*)(&m[0]); + uint32_t m1 = *(uint32_t*)(&m[4]); + uint32_t m2 = *(uint32_t*)(&m[8]); + uint32_t m3 = *(uint32_t*)(&m[12]); + + s0 ^= c0 ^ m0; + s1 ^= c1 ^ m1; + s2 ^= c2 ^ m2; + s3 ^= c3 ^ m3; + + m0 ^= c0; + m1 ^= c1; + m2 ^= c2; + m3 ^= c3; + + *(uint32_t*)(&s[0]) = s0; + *(uint32_t*)(&s[4]) = s1; + *(uint32_t*)(&s[8]) = s2; + *(uint32_t*)(&s[12]) = s3; + + *(uint32_t*)(&m[0]) = m0; + *(uint32_t*)(&m[4]) = m1; + *(uint32_t*)(&m[8]) = m2; + *(uint32_t*)(&m[12]) = m3; + +#endif + +} + +void irho_ud16 ( + unsigned char* m, + const unsigned char* c, + unsigned char* s, + int len8) { + + unsigned char cp [16]; + + pad(c,cp,len8); + + g8A(s,m); + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t cp0 = *(uint64_t*)&cp[0]; + uint64_t cp1 = *(uint64_t*)&cp[8]; + uint64_t m0 = *(uint64_t*)&m[0]; + uint64_t m1 = *(uint64_t*)&m[8]; + uint64_t s0 = *(uint64_t*)&s[0]; + uint64_t s1 = *(uint64_t*)&s[8]; + + s0 ^= cp0; + s1 ^= cp1; + + if (0 == len8) { + m0 = 0; + m1 = 0; + } else if (8 > len8) { + s0 = s0 ^ (m0 & 0xffffffffffffffff >> (64 - (len8*8))); + + m0 = m0 ^ (cp0 & 0xffffffffffffffff >> (64 - (len8*8))); + m0 = m0 ^ (m0 & 0xffffffffffffffff << ( (len8*8))); + m1 = 0; + } else if (8 == len8) { + s0 = s0 ^ m0; + + m0 = m0 ^ cp0; + m1 = 0; + } else if (16 > len8) { + len8 -= 8; + s0 = s0 ^ m0; + s1 = s1 ^ (m1 & 0xffffffffffffffff >> (64 - (len8*8))); + + m0 = m0 ^ cp0; + m1 = m1 ^ (cp1 & 0xffffffffffffffff >> (64 - (len8*8))); + m1 = m1 ^ (m1 & 0xffffffffffffffff << ( (len8*8))); + } else { + s0 = s0 ^ m0; + s1 = s1 ^ m1; + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + } + + *(uint64_t*)&s[0] = s0; + *(uint64_t*)&s[8] = s1; + *(uint64_t*)&m[0] = m0; + *(uint64_t*)&m[8] = m1; + +#else + + uint32_t cp0 = *(uint32_t*)&cp[0]; + uint32_t cp1 = *(uint32_t*)&cp[4]; + uint32_t cp2 = *(uint32_t*)&cp[8]; + uint32_t cp3 = *(uint32_t*)&cp[12]; + uint32_t m0 = *(uint32_t*)&m[0]; + uint32_t m1 = *(uint32_t*)&m[4]; + uint32_t m2 = *(uint32_t*)&m[8]; + uint32_t m3 = *(uint32_t*)&m[12]; + uint32_t s0 = *(uint32_t*)&s[0]; + uint32_t s1 = *(uint32_t*)&s[4]; + uint32_t s2 = *(uint32_t*)&s[8]; + uint32_t s3 = *(uint32_t*)&s[12]; + + s0 ^= cp0; + s1 ^= cp1; + s2 ^= cp2; + s3 ^= cp3; + + if (0 == len8) { + m0 = 0; + m1 = 0; + m2 = 0; + m3 = 0; + } else if (4 > len8) { + s0 = s0 ^ (m0 & 0xffffffff >> (32 - (len8*8))); + + m0 = m0 ^ (cp0 & 0xffffffff >> (32 - (len8*8))); + m0 = m0 ^ (m0 & 0xffffffff << ( (len8*8))); + m1 = 0; + m2 = 0; + m3 = 0; + } else if (4 == len8) { + s0 = s0 ^ m0; + + m0 = m0 ^ cp0; + m1 = 0; + m2 = 0; + m3 = 0; + } else if (8 > len8) { + len8 -= 4; + s0 = s0 ^ m0; + s1 = s1 ^ (m1 & 0xffffffff >> (32 - (len8*8))); + + m0 = m0 ^ cp0; + m1 = m1 ^ (cp1 & 0xffffffff >> (32 - (len8*8))); + m1 = m1 ^ (m1 & 0xffffffff << ( (len8*8))); + m2 = 0; + m3 = 0; + } else if (8 == len8) { + s0 = s0 ^ m0; + s1 = s1 ^ m1; + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = 0; + m3 = 0; + } else if (12 > len8) { + len8 -= 8; + s0 = s0 ^ m0; + s1 = s1 ^ m1; + s2 = s2 ^ (m2 & 0xffffffff >> (32 - (len8*8))); + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = m2 ^ (cp2 & 0xffffffff >> (32 - (len8*8))); + m2 = m2 ^ (m2 & 0xffffffff << ( (len8*8))); + m3 = 0; + } else if (12 == len8) { + s0 = s0 ^ m0; + s1 = s1 ^ m1; + s2 = s2 ^ m2; + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = m2 ^ cp2; + m3 = 0; + } else if (16 > len8) { + len8 -= 12; + s0 = s0 ^ m0; + s1 = s1 ^ m1; + s2 = s2 ^ m2; + s3 = s3 ^ (m3 & 0xffffffff >> (32 - (len8*8))); + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = m2 ^ cp2; + m3 = m3 ^ (cp3 & 0xffffffff >> (32 - (len8*8))); + m3 = m3 ^ (m3 & 0xffffffff << ( (len8*8))); + } else { + s0 = s0 ^ m0; + s1 = s1 ^ m1; + s2 = s2 ^ m2; + s3 = s3 ^ m3; + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = m2 ^ cp2; + m3 = m3 ^ cp3; + } + + *(uint32_t*)&s[0] = s0; + *(uint32_t*)&s[4] = s1; + *(uint32_t*)&s[8] = s2; + *(uint32_t*)&s[12] = s3; + *(uint32_t*)&m[0] = m0; + *(uint32_t*)&m[4] = m1; + *(uint32_t*)&m[8] = m2; + *(uint32_t*)&m[12] = m3; + +#endif + +} + +void reset_lfsr_gf56 (unsigned char* CNT) { + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&CNT[0]) = 0x0000000000000001; // CNT7 CNT6 CNT5 CNT4 CNT3 CNT2 CNT1 CNT0 + +#else + + *(uint32_t*)(&CNT[0]) = 0x00000001; // CNT3 CNT2 CNT1 CNT0 + *(uint32_t*)(&CNT[4]) = 0x00000000; // CNT7 CNT6 CNT5 CNT4 + +#endif + +} + +void lfsr_gf56 (unsigned char* CNT) { + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t C0; + uint64_t fb0; + + C0 = *(uint64_t*)(&CNT[0]); // CNT7 CNT6 CNT5 CNT4 CNT3 CNT2 CNT1 CNT0 + + fb0 = 0; + if (CNT[6] & 0x80) { + fb0 = 0x95; + } + + C0 = C0 << 1 ^ fb0; + + *(uint64_t*)(&CNT[0]) = C0; + +#else + + uint32_t C0; + uint32_t C1; + uint32_t fb0; + + C0 = *(uint32_t*)(&CNT[0]); // CNT3 CNT2 CNT1 CNT0 + C1 = *(uint32_t*)(&CNT[4]); // CNT7 CNT6 CNT5 CNT4 + + fb0 = 0; + if (CNT[6] & 0x80) { + fb0 = 0x95; + } + + C1 = C1 << 1 | C0 >> 31; + C0 = C0 << 1 ^ fb0; + + *(uint32_t*)(&CNT[0]) = C0; + *(uint32_t*)(&CNT[4]) = C1; + +#endif + +} + +void block_cipher( + unsigned char* s, + const unsigned char* k, unsigned char* T, + unsigned char* CNT, unsigned char D, + skinny_ctrl* p_skinny_ctrl) { + + CNT[7] = D; + p_skinny_ctrl->func_skinny_128_384_enc(s, p_skinny_ctrl, CNT, T, k); + +} + +void nonce_encryption ( + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + skinny_ctrl* p_skinny_ctrl) { + + block_cipher(s,k,(unsigned char*)N,CNT,D,p_skinny_ctrl); + +} + +void generate_tag ( + unsigned char** c, unsigned char* s, + unsigned long long* clen) { + + g8A_for_Tag_Generation(s, *c); + + *c = *c + 16; + *c = *c - *clen; + +} + +unsigned long long msg_encryption ( + const unsigned char** M, unsigned char** c, + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long mlen, + skinny_ctrl* l_skinny_ctrl) { + + int len8; + + if (mlen >= 16) { + len8 = 16; + mlen = mlen - 16; + rho_eqov16(*M, *c, s); + } + else { + len8 = mlen; + mlen = 0; + rho_ud16(*M, *c, s, len8); + } + *c = *c + len8; + *M = *M + len8; + lfsr_gf56(CNT); + if (mlen != 0) { + nonce_encryption(N,CNT,s,k,D,l_skinny_ctrl); + } + return mlen; + +} + +unsigned long long msg_decryption ( + unsigned char** M, const unsigned char** c, + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long clen, + skinny_ctrl* l_skinny_ctrl) { + + int len8; + + if (clen >= 16) { + len8 = 16; + clen = clen - 16; + irho_eqov16(*M, *c, s); + } + else { + len8 = clen; + clen = 0; + irho_ud16(*M, *c, s, len8); + } + *c = *c + len8; + *M = *M + len8; + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,D,l_skinny_ctrl); + return clen; + +} + +unsigned long long ad2msg_encryption ( + const unsigned char** M, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long mlen, + skinny_ctrl* l_skinny_ctrl) { + + unsigned char T [16]; + int len8; + + if (mlen <= 16) { + len8 = mlen; + mlen = 0; + } + else { + len8 = 16; + mlen = mlen - 16; + } + + pad (*M,T,len8); + block_cipher(s,k,T,CNT,D,l_skinny_ctrl); + lfsr_gf56(CNT); + *M = *M + len8; + + return mlen; + +} + +unsigned long long ad_encryption ( + const unsigned char** A, unsigned char* s, + const unsigned char* k, unsigned long long adlen, + unsigned char* CNT, + unsigned char D, + skinny_ctrl* l_skinny_ctrl) { + + unsigned char T [16]; + int len8; + + if (adlen >= 16) { + len8 = 16; + adlen = adlen - 16; + + rho_ad_eqov16(*A, s); + } + else { + len8 = adlen; + adlen = 0; + rho_ad_ud16(*A, s, len8); + } + *A = *A + len8; + lfsr_gf56(CNT); + if (adlen != 0) { + if (adlen >= 16) { + len8 = 16; + adlen = adlen - 16; + } + else { + len8 = adlen; + adlen = 0; + } + pad(*A, T, len8); + *A = *A + len8; + block_cipher(s,k,T,CNT,D,l_skinny_ctrl); + lfsr_gf56(CNT); + } + + return adlen; + +} + +int crypto_aead_encrypt ( + unsigned char* c, unsigned long long* clen, + const unsigned char* m, unsigned long long mlen, + const unsigned char* ad, unsigned long long adlen, + const unsigned char* nsec, + const unsigned char* npub, + const unsigned char* k) { + + unsigned char s[16]; + unsigned char CNT[8]; + unsigned char T[16]; + const unsigned char* N; + unsigned char w; + unsigned long long xlen; + + skinny_ctrl l_skinny_ctrl; + l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc123_12; + + (void)nsec; + N = npub; + + xlen = mlen; + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) = 0; + *(uint64_t*)(&s[8]) = 0; + +#else + + *(uint32_t*)(&s[0]) = 0; + *(uint32_t*)(&s[4]) = 0; + *(uint32_t*)(&s[8]) = 0; + *(uint32_t*)(&s[12]) = 0; + +#endif + + reset_lfsr_gf56(CNT); + + w = 48; + + if (adlen == 0) { + w = w ^ 2; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else if (adlen%(32) == 0) { + w = w ^ 8; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else if (adlen%(32) < 16) { + w = w ^ 2; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else if (adlen%(32) == 16) { + w = w ^ 0; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else { + w = w ^ 10; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + + if (adlen == 0) { // AD is an empty string + lfsr_gf56(CNT); + } + else while (adlen > 0) { + adlen = ad_encryption(&ad,s,k,adlen,CNT,40,&l_skinny_ctrl); + } + + if ((w & 8) == 0) { + xlen = ad2msg_encryption (&m,CNT,s,k,44,xlen,&l_skinny_ctrl); + } + else if (mlen == 0) { + lfsr_gf56(CNT); + } + while (xlen > 0) { + xlen = ad_encryption(&m,s,k,xlen,CNT,44,&l_skinny_ctrl); + } + nonce_encryption(N,CNT,s,k,w,&l_skinny_ctrl); + + // Tag generation + g8A(s, T); + + m = m - mlen; + + l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1; + + reset_lfsr_gf56(CNT); + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) = *(uint64_t*)(&T[0]); + *(uint64_t*)(&s[8]) = *(uint64_t*)(&T[8]); + +#else + + *(uint32_t*)(&s[0]) = *(uint32_t*)(&T[0]); + *(uint32_t*)(&s[4]) = *(uint32_t*)(&T[4]); + *(uint32_t*)(&s[8]) = *(uint32_t*)(&T[8]); + *(uint32_t*)(&s[12]) = *(uint32_t*)(&T[12]); + +#endif + + *clen = mlen + 16; + + if (mlen > 0) { + nonce_encryption(N,CNT,s,k,36,&l_skinny_ctrl); + while (mlen > 16) { + mlen = msg_encryption(&m,&c,N,CNT,s,k,36,mlen,&l_skinny_ctrl); + } + rho_ud16(m, c, s, mlen); + c = c + mlen; + m = m + mlen; + } + + // Tag Concatenation + c[0] = T[0]; + c[1] = T[1]; + c[2] = T[2]; + c[3] = T[3]; + c[4] = T[4]; + c[5] = T[5]; + c[6] = T[6]; + c[7] = T[7]; + c[8] = T[8]; + c[9] = T[9]; + c[10] = T[10]; + c[11] = T[11]; + c[12] = T[12]; + c[13] = T[13]; + c[14] = T[14]; + c[15] = T[15]; + + c = c - *clen; + + return 0; + +} + +int crypto_aead_decrypt( + unsigned char *m,unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c,unsigned long long clen, + const unsigned char *ad,unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k) { + + unsigned char s[16]; + unsigned char CNT[8]; + unsigned char T[16]; + const unsigned char* N; + unsigned char w; + unsigned long long xlen; + const unsigned char* mauth; + unsigned char* p1; + unsigned char* p2; + + skinny_ctrl l_skinny_ctrl; + l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc123_12; + + (void)nsec; + mauth = m; + + N = npub; + + xlen = clen-16; + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) = 0; + *(uint64_t*)(&s[8]) = 0; + +#else + + *(uint32_t*)(&s[0]) = 0; + *(uint32_t*)(&s[4]) = 0; + *(uint32_t*)(&s[8]) = 0; + *(uint32_t*)(&s[12]) = 0; + +#endif + + reset_lfsr_gf56(CNT); + + w = 48; + + if (adlen == 0) { + w = w ^ 2; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else if (adlen%(32) == 0) { + w = w ^ 8; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else if (adlen%(32) < 16) { + w = w ^ 2; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else if (adlen%(32) == 16) { + w = w ^ 0; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else { + w = w ^ 10; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + + if (adlen == 0) { // AD is an empty string + lfsr_gf56(CNT); + } + else while (adlen > 0) { + adlen = ad_encryption(&ad,s,k,adlen,CNT,40,&l_skinny_ctrl); + } + + if ((w & 8) == 0) { + xlen = ad2msg_encryption (&mauth,CNT,s,k,44,xlen,&l_skinny_ctrl); + } + else if (clen == 0) { + lfsr_gf56(CNT); + } + while (xlen > 0) { + xlen = ad_encryption(&mauth,s,k,xlen,CNT,44,&l_skinny_ctrl); + } + nonce_encryption(N,CNT,s,k,w,&l_skinny_ctrl); + + // Tag generation + g8A(s, T); + + l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1; + + reset_lfsr_gf56(CNT); + + p1 = T; + p2 = (unsigned char*)&c[clen - 16]; + + p1[0] = p2[0]; + p1[1] = p2[1]; + p1[2] = p2[2]; + p1[3] = p2[3]; + p1[4] = p2[4]; + p1[5] = p2[5]; + p1[6] = p2[6]; + p1[7] = p2[7]; + p1[8] = p2[8]; + p1[9] = p2[9]; + p1[10] = p2[10]; + p1[11] = p2[11]; + p1[12] = p2[12]; + p1[13] = p2[13]; + p1[14] = p2[14]; + p1[15] = p2[15]; + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) = *(uint64_t*)(&T[0]); + *(uint64_t*)(&s[8]) = *(uint64_t*)(&T[8]); + +#else + + *(uint32_t*)(&s[0]) = *(uint32_t*)(&T[0]); + *(uint32_t*)(&s[4]) = *(uint32_t*)(&T[4]); + *(uint32_t*)(&s[8]) = *(uint32_t*)(&T[8]); + *(uint32_t*)(&s[12]) = *(uint32_t*)(&T[12]); + +#endif + + clen = clen - 16; + *mlen = clen; + + if (clen > 0) { + nonce_encryption(N,CNT,s,k,36,&l_skinny_ctrl); + + l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1; + + while (clen > 16) { + clen = msg_decryption(&m,&c,N,CNT,s,k,36,clen,&l_skinny_ctrl); + } + irho_ud16(m, c, s, clen); + c = c + clen; + m = m + clen; + } + + for (int i = 0; i < 16; i++) { + if (T[i] != (*(c+i))) { + return -1; + } + } + + return 0; + +} diff --git a/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/skinny.h b/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/skinny.h new file mode 100644 index 0000000..c8e7b56 --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/skinny.h @@ -0,0 +1,106 @@ +#define ___SKINNY_LOOP +//#define ___NUM_OF_ROUNDS_56 +#if (defined(__riscv_xlen) && (__riscv_xlen == 64)) +#define ___ENABLE_DWORD_CAST +#endif + +#include + +typedef struct ___skinny_ctrl { +#ifdef ___NUM_OF_ROUNDS_56 + unsigned char roundKeys[960]; // number of rounds : 56 +#else + unsigned char roundKeys[704]; // number of rounds : 40 +#endif + void (*func_skinny_128_384_enc)(unsigned char*, struct ___skinny_ctrl*, unsigned char* CNT, unsigned char* T, const unsigned char* K); +} skinny_ctrl; + +extern void skinny_128_384_enc123_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K); +extern void skinny_128_384_enc12_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K); +extern void skinny_128_384_enc1_1 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K); + +#define pack_word(x0, x1, x2, x3, w) \ + w = ((x3) << 24) ^ \ + ((x2) << 16) ^ \ + ((x1) << 8) ^ \ + (x0); + +#define unpack_word(x0, x1, x2, x3, w) \ + x0 = ((w) & 0xff); \ + x1 = (((w) >> 8) & 0xff); \ + x2 = (((w) >> 16) & 0xff); \ + x3 = ((w) >> 24); + +#ifdef ___ENABLE_DWORD_CAST + +#define PERMUTATION() \ +/* permutation */ \ + \ + /* 7 6 5 4 3 2 1 0 */ \ + /* 5 7 2 3 6 0 4 1 */ \ + \ + /* dw (7 6 5 4 3 2 1 0) */ \ + \ + /* dw (5 7 2 3 6 0 4 1) */ \ + \ + dt0 = dw >> 24; /* - - - 7 6 5 4 3 */ \ + dt0 = dt0 & 0x00000000ff00ff00; /* - - - - 6 - 4 - */ \ + \ + dt1 = dw << 16; /* 5 4 3 2 1 0 - - */ \ + dt1 = dt1 & 0xff00000000ff0000; /* 5 - - - - 0 - - */ \ + dt0 = dt0 ^ dt1; /* 5 - - - 6 0 4 - */ \ + \ + dt1 = dw >> 8; /* - 7 6 5 4 3 2 1 */ \ + dt1 = dt1 & 0x00ff0000000000ff; /* - 7 - - - - - 1 */ \ + dt0 = dt0 ^ dt1; /* 5 7 - - 6 0 4 1 */ \ + \ + dt1 = dw << 8; /* 6 5 4 3 2 1 0 - */ \ + dt1 = dt1 & 0x000000ff00000000; /* - - - 3 - - - - */ \ + dt0 = dt0 ^ dt1; /* 5 7 - 3 6 0 4 1 */ \ + \ + dt1 = dw << 24; /* 4 3 2 1 0 - - - */ \ + dw = dt1 & 0x0000ff0000000000; /* - - 2 - - - - - */ \ + dw = dw ^ dt0; /* 5 7 2 3 6 0 4 1 */ + +#else + +#define PERMUTATION() \ +/* permutation */ \ + \ + /* 7 6 5 4 3 2 1 0 */ \ + /* 5 7 2 3 6 0 4 1 */ \ + \ + /* w0 (3 2 1 0) */ \ + /* w1 (7 6 5 4) */ \ + \ + /* w0 (6 0 4 1) */ \ + /* w1 (5 7 2 3) */ \ + \ + t0 = w1 << 8; /* 6 5 4 - */ \ + t0 = t0 & 0xff00ff00; /* 6 - 4 - */ \ + \ + t1 = w1 << 16; /* 5 4 - - */ \ + t1 = t1 & 0xff000000; /* 5 - - - */ \ + \ + t2 = w1 & 0xff000000; /* 7 - - - */ \ + t2 = t2 >> 8; /* - 7 - - */ \ + t1 = t1 ^ t2; /* 5 7 - - */ \ + \ + t2 = w0 & 0xff000000; /* 3 - - - */ \ + t2 = t2 >> 24; /* - - - 3 */ \ + t1 = t1 ^ t2; /* 5 7 - 3 */ \ + \ + w1 = w0 >> 8; /* - 3 2 1 */ \ + w1 = w1 & 0x0000ff00; /* - - 2 - */ \ + w1 = w1 ^ t1; /* 5 7 2 3 */ \ + \ + t2 = w0 & 0x0000ff00; /* - - 1 - */ \ + t2 = t2 >> 8; /* - - - 1 */ \ + t0 = t0 ^ t2; /* 6 - 4 1 */ \ + \ + w0 = w0 << 16; /* 1 0 - - */ \ + w0 = w0 & 0x00ff0000; /* - 0 - - */ \ + w0 = w0 ^ t0; /* 6 0 4 1 */ + +#endif + diff --git a/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/skinny_key_schedule2.c b/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/skinny_key_schedule2.c new file mode 100644 index 0000000..c2f30de --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/skinny_key_schedule2.c @@ -0,0 +1,431 @@ +/****************************************************************************** + * Copyright (c) 2020, NEC Corporation. + * + * THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND. + * + *****************************************************************************/ + +/* + * SKINNY-128-384 + * + * load * AC(c0 c1) ^ TK3 + * calc AC(c0 c1) ^ TK2 -> store + * ART(TK2) + * + * number of rounds : 40 or 56 + */ + +#include "skinny.h" + +#ifdef ___ENABLE_DWORD_CAST + +#define PERMUTATION_TK2() \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK2) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x6 x5 x4 x3 x2 x1 x0 x7^x5) */ \ + dw = ((dw << 1) & 0xfefefefefefefefe) ^ \ + (((dw >> 7) ^ (dw >> 5)) & 0x0101010101010101); \ + \ + /* Load TK3 */ \ + /* TK2^TK3^AC(c0 c1) */ \ + /* store */ \ + *tk2 = dw ^ *tk3; \ + tk2 += 2; \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys) +{ + uint64_t* tk2; // used in MACRO + uint64_t* tk3; // used in MACRO + uint64_t dt0; // used in MACRO + uint64_t dt1; // used in MACRO + uint64_t dw; + + // odd + + // load master key + // load master key + dw = *(uint64_t*)&roundKeys[16]; + + tk2 = (uint64_t*)&roundKeys[64]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[384]; +#else + tk3 = (uint64_t*)&roundKeys[512]; +#endif + + // 1st round + *tk2 = dw ^ *tk3; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th,43th, ... ,51th,53th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + + // even + + // load master key + dw = *(uint64_t*)&roundKeys[24]; + + tk2 = (uint64_t*)&roundKeys[72]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[392]; +#else + tk3 = (uint64_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys) +{ + uint64_t* tk2; // used in MACRO + uint64_t* tk3; // used in MACRO + uint64_t dt0; // used in MACRO + uint64_t dt1; // used in MACRO + uint64_t dw; + + // odd + + // load master key + dw = *(uint64_t*)&roundKeys[16]; + + tk2 = (uint64_t*)&roundKeys[64]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[384]; +#else + tk3 = (uint64_t*)&roundKeys[512]; +#endif + + // 1st round + *tk2 = dw ^ *tk3; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + PERMUTATION_TK2(); + } + + // even + + // load master key + dw = *(uint64_t*)&roundKeys[24]; + + tk2 = (uint64_t*)&roundKeys[72]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[392]; +#else + tk3 = (uint64_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + PERMUTATION_TK2(); + } + +} + +#endif /* ___SKINNY_LOOP */ + +#else /* ___ENABLE_DWORD_CAST */ + +#define PERMUTATION_TK2() \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK2) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x6 x5 x4 x3 x2 x1 x0 x7^x5) */ \ + w0 = ((w0 << 1) & 0xfefefefe) ^ \ + (((w0 >> 7) ^ (w0 >> 5)) & 0x01010101); \ + w1 = ((w1 << 1) & 0xfefefefe) ^ \ + (((w1 >> 7) ^ (w1 >> 5)) & 0x01010101); \ + \ + /* Load TK3 */ \ + /* TK2^TK3^AC(c0 c1) */ \ + /* store */ \ + *tk2++ = w0 ^ *tk3++; \ + *tk2++ = w1 ^ *tk3++; \ + tk2 += 2; \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys) +{ + uint32_t* tk2; // used in MACRO + uint32_t* tk3; // used in MACRO + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + + // odd + + // load master key + w0 = *(uint32_t*)&roundKeys[16]; + w1 = *(uint32_t*)&roundKeys[20]; + + tk2 = (uint32_t*)&roundKeys[64]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[384]; +#else + tk3 = (uint32_t*)&roundKeys[512]; +#endif + + // 1st round + *tk2++ = w0 ^ *tk3++; + *tk2++ = w1 ^ *tk3++; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th,43th, ... ,51th,53th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + + // even + + // load master key + w0 = *(uint32_t*)&roundKeys[24]; + w1 = *(uint32_t*)&roundKeys[28]; + + tk2 = (uint32_t*)&roundKeys[72]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[392]; +#else + tk3 = (uint32_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys) +{ + uint32_t* tk2; // used in MACRO + uint32_t* tk3; // used in MACRO + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + + // odd + + // load master key + w0 = *(uint32_t*)&roundKeys[16]; + w1 = *(uint32_t*)&roundKeys[20]; + + tk2 = (uint32_t*)&roundKeys[64]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[384]; +#else + tk3 = (uint32_t*)&roundKeys[512]; +#endif + + // 1st round + *tk2++ = w0 ^ *tk3++; + *tk2++ = w1 ^ *tk3++; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + PERMUTATION_TK2(); + } + + // even + + // load master key + w0 = *(uint32_t*)&roundKeys[24]; + w1 = *(uint32_t*)&roundKeys[28]; + + tk2 = (uint32_t*)&roundKeys[72]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[392]; +#else + tk3 = (uint32_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + PERMUTATION_TK2(); + } + +} + +#endif /* ___SKINNY_LOOP */ + +#endif /* ___ENABLE_DWORD_CAST */ + diff --git a/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/skinny_key_schedule3.c b/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/skinny_key_schedule3.c new file mode 100644 index 0000000..5dcaf7f --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/skinny_key_schedule3.c @@ -0,0 +1,428 @@ +/****************************************************************************** + * Copyright (c) 2020, NEC Corporation. + * + * THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND. + * + *****************************************************************************/ + +/* + * SKINNY-128-384 + * + * AC(c0 c1) ^ TK3 -> store + * ART(TK3) + * + * number of rounds : 40 or 56 + */ + +#include "skinny.h" + +#ifdef ___ENABLE_DWORD_CAST + +#define PERMUTATION_TK3(c0Val, c1Val) \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK3) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x0^x6 x7 x6 x5 x4 x3 x2 x1) */ \ + dw = ((dw >> 1) & 0x7f7f7f7f7f7f7f7f) ^ \ + (((dw << 7) ^ (dw << 1)) & 0x8080808080808080); \ + \ + /* K3^AC(c0 c1) */ \ + /* store */ \ + dt0 = dw ^ c0Val; \ + *tk3 = dt0 ^ ((uint64_t)c1Val << 40); \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys) +{ + uint64_t *tk3; + uint64_t dt0; // used in MACRO + uint64_t dt1; // used in MACRO + uint64_t dw; + + // odd + + // load master key + dw = *(uint64_t*)&roundKeys[32]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[384]; +#else + tk3 = (uint64_t*)&roundKeys[512]; +#endif + + // 1st round + *tk3++ = dw ^ 0x01; + tk3 += 1; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK3(0x7, 0x0); + PERMUTATION_TK3(0xf, 0x1); + PERMUTATION_TK3(0xd, 0x3); + PERMUTATION_TK3(0x7, 0x3); + PERMUTATION_TK3(0xe, 0x1); + PERMUTATION_TK3(0x9, 0x3); + PERMUTATION_TK3(0x7, 0x2); + PERMUTATION_TK3(0xd, 0x1); + PERMUTATION_TK3(0x5, 0x3); + + PERMUTATION_TK3(0x6, 0x1); + PERMUTATION_TK3(0x8, 0x1); + PERMUTATION_TK3(0x1, 0x2); + PERMUTATION_TK3(0x5, 0x0); + PERMUTATION_TK3(0x7, 0x1); + PERMUTATION_TK3(0xc, 0x1); + PERMUTATION_TK3(0x1, 0x3); + PERMUTATION_TK3(0x6, 0x0); + PERMUTATION_TK3(0xb, 0x1); + PERMUTATION_TK3(0xd, 0x2); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41td,43th, ... ,53th,55th round + PERMUTATION_TK3(0x4, 0x3); + PERMUTATION_TK3(0x2, 0x1); + PERMUTATION_TK3(0x8, 0x0); + PERMUTATION_TK3(0x2, 0x2); + PERMUTATION_TK3(0x9, 0x0); + PERMUTATION_TK3(0x6, 0x2); + PERMUTATION_TK3(0x9, 0x1); + PERMUTATION_TK3(0x5, 0x2); + +#endif + + // even + + // load master key + dw = *(uint64_t*)&roundKeys[40]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[392]; +#else + tk3 = (uint64_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... ,38th,40th round + PERMUTATION_TK3(0x3, 0x0); + PERMUTATION_TK3(0xf, 0x0); + PERMUTATION_TK3(0xe, 0x3); + PERMUTATION_TK3(0xb, 0x3); + PERMUTATION_TK3(0xf, 0x2); + PERMUTATION_TK3(0xc, 0x3); + PERMUTATION_TK3(0x3, 0x3); + PERMUTATION_TK3(0xe, 0x0); + PERMUTATION_TK3(0xa, 0x3); + PERMUTATION_TK3(0xb, 0x2); + + PERMUTATION_TK3(0xc, 0x2); + PERMUTATION_TK3(0x0, 0x3); + PERMUTATION_TK3(0x2, 0x0); + PERMUTATION_TK3(0xb, 0x0); + PERMUTATION_TK3(0xe, 0x2); + PERMUTATION_TK3(0x8, 0x3); + PERMUTATION_TK3(0x3, 0x2); + PERMUTATION_TK3(0xd, 0x0); + PERMUTATION_TK3(0x6, 0x3); + PERMUTATION_TK3(0xa, 0x1); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK3(0x9, 0x2); + PERMUTATION_TK3(0x4, 0x2); + PERMUTATION_TK3(0x1, 0x1); + PERMUTATION_TK3(0x4, 0x0); + PERMUTATION_TK3(0x3, 0x1); + PERMUTATION_TK3(0xc, 0x0); + PERMUTATION_TK3(0x2, 0x3); + PERMUTATION_TK3(0xa, 0x0); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys, unsigned char *pRC) +{ + uint64_t *tk3; + uint64_t dt0; // used in MACRO + uint64_t dt1; // used in MACRO + uint64_t dw; + uint64_t c0; + uint64_t c1; + + // odd + + // load master key + dw = *(uint64_t*)&roundKeys[32]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[384]; +#else + tk3 = (uint64_t*)&roundKeys[512]; +#endif + + // 1st round + *tk3++ = dw ^ 0x01; + tk3 += 1; + + pRC += 4; + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + + // even + + // load master key + dw = *(uint64_t*)&roundKeys[40]; + +#ifndef ___NUM_OF_ROUNDS_56 + pRC -= 78; + tk3 = (uint64_t*)&roundKeys[392]; +#else + pRC -= 110; + tk3 = (uint64_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + +} + +#endif /* ___SKINNY_LOOP */ + +#else /* ___ENABLE_DWORD_CAST */ + +#define PERMUTATION_TK3(c0Val, c1Val) \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK3) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x0^x6 x7 x6 x5 x4 x3 x2 x1) */ \ + w0 = ((w0 >> 1) & 0x7f7f7f7f) ^ \ + (((w0 << 7) ^ (w0 << 1)) & 0x80808080); \ + w1 = ((w1 >> 1) & 0x7f7f7f7f) ^ \ + (((w1 << 7) ^ (w1 << 1)) & 0x80808080); \ + \ + /* K3^AC(c0 c1) */ \ + /* store */ \ + *tk3++ = w0 ^ c0Val; \ + *tk3++ = w1 ^ ((uint32_t)c1Val << 8); \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys) +{ + uint32_t *tk3; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + + // odd + + // load master key + w0 = *(uint32_t*)&roundKeys[32]; + w1 = *(uint32_t*)&roundKeys[36]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[384]; +#else + tk3 = (uint32_t*)&roundKeys[512]; +#endif + + // 1st round + *tk3++ = w0 ^ 0x01; + *tk3++ = w1; + tk3 += 2; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK3(0x7, 0x0); + PERMUTATION_TK3(0xf, 0x1); + PERMUTATION_TK3(0xd, 0x3); + PERMUTATION_TK3(0x7, 0x3); + PERMUTATION_TK3(0xe, 0x1); + PERMUTATION_TK3(0x9, 0x3); + PERMUTATION_TK3(0x7, 0x2); + PERMUTATION_TK3(0xd, 0x1); + PERMUTATION_TK3(0x5, 0x3); + + PERMUTATION_TK3(0x6, 0x1); + PERMUTATION_TK3(0x8, 0x1); + PERMUTATION_TK3(0x1, 0x2); + PERMUTATION_TK3(0x5, 0x0); + PERMUTATION_TK3(0x7, 0x1); + PERMUTATION_TK3(0xc, 0x1); + PERMUTATION_TK3(0x1, 0x3); + PERMUTATION_TK3(0x6, 0x0); + PERMUTATION_TK3(0xb, 0x1); + PERMUTATION_TK3(0xd, 0x2); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41td,43th, ... ,53th,55th round + PERMUTATION_TK3(0x4, 0x3); + PERMUTATION_TK3(0x2, 0x1); + PERMUTATION_TK3(0x8, 0x0); + PERMUTATION_TK3(0x2, 0x2); + PERMUTATION_TK3(0x9, 0x0); + PERMUTATION_TK3(0x6, 0x2); + PERMUTATION_TK3(0x9, 0x1); + PERMUTATION_TK3(0x5, 0x2); + +#endif + + // even + + // load master key + w0 = *(uint32_t*)&roundKeys[40]; + w1 = *(uint32_t*)&roundKeys[44]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[392]; +#else + tk3 = (uint32_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... ,38th,40th round + PERMUTATION_TK3(0x3, 0x0); + PERMUTATION_TK3(0xf, 0x0); + PERMUTATION_TK3(0xe, 0x3); + PERMUTATION_TK3(0xb, 0x3); + PERMUTATION_TK3(0xf, 0x2); + PERMUTATION_TK3(0xc, 0x3); + PERMUTATION_TK3(0x3, 0x3); + PERMUTATION_TK3(0xe, 0x0); + PERMUTATION_TK3(0xa, 0x3); + PERMUTATION_TK3(0xb, 0x2); + + PERMUTATION_TK3(0xc, 0x2); + PERMUTATION_TK3(0x0, 0x3); + PERMUTATION_TK3(0x2, 0x0); + PERMUTATION_TK3(0xb, 0x0); + PERMUTATION_TK3(0xe, 0x2); + PERMUTATION_TK3(0x8, 0x3); + PERMUTATION_TK3(0x3, 0x2); + PERMUTATION_TK3(0xd, 0x0); + PERMUTATION_TK3(0x6, 0x3); + PERMUTATION_TK3(0xa, 0x1); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK3(0x9, 0x2); + PERMUTATION_TK3(0x4, 0x2); + PERMUTATION_TK3(0x1, 0x1); + PERMUTATION_TK3(0x4, 0x0); + PERMUTATION_TK3(0x3, 0x1); + PERMUTATION_TK3(0xc, 0x0); + PERMUTATION_TK3(0x2, 0x3); + PERMUTATION_TK3(0xa, 0x0); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys, unsigned char *pRC) +{ + uint32_t *tk3; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + uint32_t c0; + uint32_t c1; + + // odd + + // load master key + w0 = *(uint32_t*)&roundKeys[32]; + w1 = *(uint32_t*)&roundKeys[36]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[384]; +#else + tk3 = (uint32_t*)&roundKeys[512]; +#endif + + // 1st round + *tk3++ = w0 ^ 0x01; + *tk3++ = w1; + tk3 += 2; + + pRC += 4; + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + + // even + + // load master key + w0 = *(uint32_t*)&roundKeys[40]; + w1 = *(uint32_t*)&roundKeys[44]; + +#ifndef ___NUM_OF_ROUNDS_56 + pRC -= 78; + tk3 = (uint32_t*)&roundKeys[392]; +#else + pRC -= 110; + tk3 = (uint32_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + +} + +#endif /* ___SKINNY_LOOP */ + +#endif /* ___ENABLE_DWORD_CAST */ + diff --git a/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/skinny_main.c b/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/skinny_main.c new file mode 100644 index 0000000..8a6e75f --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/skinny_main.c @@ -0,0 +1,675 @@ +/****************************************************************************** + * Copyright (c) 2020, NEC Corporation. + * + * THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND. + * + *****************************************************************************/ + +/* + * SKINNY-128-384 + * + * ART(TK1) -> store + * load AC(c0 c1) ^ TK3 ^ TK2 + * load TK1 + * calc AC(c0 c1) ^ TK3 ^ TK2 ^ TK1 -> use at (AC->ART) + * SC->SR->(AC->ART)->MC + * + * number of rounds : 40 or 56 + */ + +#include "skinny.h" + +/* + * S-BOX + */ +unsigned char SBOX[] += { + // Original + 0x65, 0x4c, 0x6a, 0x42, 0x4b, 0x63, 0x43, 0x6b, 0x55, 0x75, 0x5a, 0x7a, 0x53, 0x73, 0x5b, 0x7b, + 0x35, 0x8c, 0x3a, 0x81, 0x89, 0x33, 0x80, 0x3b, 0x95, 0x25, 0x98, 0x2a, 0x90, 0x23, 0x99, 0x2b, + 0xe5, 0xcc, 0xe8, 0xc1, 0xc9, 0xe0, 0xc0, 0xe9, 0xd5, 0xf5, 0xd8, 0xf8, 0xd0, 0xf0, 0xd9, 0xf9, + 0xa5, 0x1c, 0xa8, 0x12, 0x1b, 0xa0, 0x13, 0xa9, 0x05, 0xb5, 0x0a, 0xb8, 0x03, 0xb0, 0x0b, 0xb9, + 0x32, 0x88, 0x3c, 0x85, 0x8d, 0x34, 0x84, 0x3d, 0x91, 0x22, 0x9c, 0x2c, 0x94, 0x24, 0x9d, 0x2d, + 0x62, 0x4a, 0x6c, 0x45, 0x4d, 0x64, 0x44, 0x6d, 0x52, 0x72, 0x5c, 0x7c, 0x54, 0x74, 0x5d, 0x7d, + 0xa1, 0x1a, 0xac, 0x15, 0x1d, 0xa4, 0x14, 0xad, 0x02, 0xb1, 0x0c, 0xbc, 0x04, 0xb4, 0x0d, 0xbd, + 0xe1, 0xc8, 0xec, 0xc5, 0xcd, 0xe4, 0xc4, 0xed, 0xd1, 0xf1, 0xdc, 0xfc, 0xd4, 0xf4, 0xdd, 0xfd, + 0x36, 0x8e, 0x38, 0x82, 0x8b, 0x30, 0x83, 0x39, 0x96, 0x26, 0x9a, 0x28, 0x93, 0x20, 0x9b, 0x29, + 0x66, 0x4e, 0x68, 0x41, 0x49, 0x60, 0x40, 0x69, 0x56, 0x76, 0x58, 0x78, 0x50, 0x70, 0x59, 0x79, + 0xa6, 0x1e, 0xaa, 0x11, 0x19, 0xa3, 0x10, 0xab, 0x06, 0xb6, 0x08, 0xba, 0x00, 0xb3, 0x09, 0xbb, + 0xe6, 0xce, 0xea, 0xc2, 0xcb, 0xe3, 0xc3, 0xeb, 0xd6, 0xf6, 0xda, 0xfa, 0xd3, 0xf3, 0xdb, 0xfb, + 0x31, 0x8a, 0x3e, 0x86, 0x8f, 0x37, 0x87, 0x3f, 0x92, 0x21, 0x9e, 0x2e, 0x97, 0x27, 0x9f, 0x2f, + 0x61, 0x48, 0x6e, 0x46, 0x4f, 0x67, 0x47, 0x6f, 0x51, 0x71, 0x5e, 0x7e, 0x57, 0x77, 0x5f, 0x7f, + 0xa2, 0x18, 0xae, 0x16, 0x1f, 0xa7, 0x17, 0xaf, 0x01, 0xb2, 0x0e, 0xbe, 0x07, 0xb7, 0x0f, 0xbf, + 0xe2, 0xca, 0xee, 0xc6, 0xcf, 0xe7, 0xc7, 0xef, 0xd2, 0xf2, 0xde, 0xfe, 0xd7, 0xf7, 0xdf, 0xff, +}; + + /* + * S-BOX ^ AC(c2) + */ +unsigned char SBOX2[] += { // Original ^ c2(0x02) + 0x67, 0x4e, 0x68, 0x40, 0x49, 0x61, 0x41, 0x69, 0x57, 0x77, 0x58, 0x78, 0x51, 0x71, 0x59, 0x79, + 0x37, 0x8e, 0x38, 0x83, 0x8b, 0x31, 0x82, 0x39, 0x97, 0x27, 0x9a, 0x28, 0x92, 0x21, 0x9b, 0x29, + 0xe7, 0xce, 0xea, 0xc3, 0xcb, 0xe2, 0xc2, 0xeb, 0xd7, 0xf7, 0xda, 0xfa, 0xd2, 0xf2, 0xdb, 0xfb, + 0xa7, 0x1e, 0xaa, 0x10, 0x19, 0xa2, 0x11, 0xab, 0x07, 0xb7, 0x08, 0xba, 0x01, 0xb2, 0x09, 0xbb, + 0x30, 0x8a, 0x3e, 0x87, 0x8f, 0x36, 0x86, 0x3f, 0x93, 0x20, 0x9e, 0x2e, 0x96, 0x26, 0x9f, 0x2f, + 0x60, 0x48, 0x6e, 0x47, 0x4f, 0x66, 0x46, 0x6f, 0x50, 0x70, 0x5e, 0x7e, 0x56, 0x76, 0x5f, 0x7f, + 0xa3, 0x18, 0xae, 0x17, 0x1f, 0xa6, 0x16, 0xaf, 0x00, 0xb3, 0x0e, 0xbe, 0x06, 0xb6, 0x0f, 0xbf, + 0xe3, 0xca, 0xee, 0xc7, 0xcf, 0xe6, 0xc6, 0xef, 0xd3, 0xf3, 0xde, 0xfe, 0xd6, 0xf6, 0xdf, 0xff, + 0x34, 0x8c, 0x3a, 0x80, 0x89, 0x32, 0x81, 0x3b, 0x94, 0x24, 0x98, 0x2a, 0x91, 0x22, 0x99, 0x2b, + 0x64, 0x4c, 0x6a, 0x43, 0x4b, 0x62, 0x42, 0x6b, 0x54, 0x74, 0x5a, 0x7a, 0x52, 0x72, 0x5b, 0x7b, + 0xa4, 0x1c, 0xa8, 0x13, 0x1b, 0xa1, 0x12, 0xa9, 0x04, 0xb4, 0x0a, 0xb8, 0x02, 0xb1, 0x0b, 0xb9, + 0xe4, 0xcc, 0xe8, 0xc0, 0xc9, 0xe1, 0xc1, 0xe9, 0xd4, 0xf4, 0xd8, 0xf8, 0xd1, 0xf1, 0xd9, 0xf9, + 0x33, 0x88, 0x3c, 0x84, 0x8d, 0x35, 0x85, 0x3d, 0x90, 0x23, 0x9c, 0x2c, 0x95, 0x25, 0x9d, 0x2d, + 0x63, 0x4a, 0x6c, 0x44, 0x4d, 0x65, 0x45, 0x6d, 0x53, 0x73, 0x5c, 0x7c, 0x55, 0x75, 0x5d, 0x7d, + 0xa0, 0x1a, 0xac, 0x14, 0x1d, 0xa5, 0x15, 0xad, 0x03, 0xb0, 0x0c, 0xbc, 0x05, 0xb5, 0x0d, 0xbd, + 0xe0, 0xc8, 0xec, 0xc4, 0xcd, 0xe5, 0xc5, 0xed, 0xd0, 0xf0, 0xdc, 0xfc, 0xd5, 0xf5, 0xdd, 0xfd, +}; + +#ifdef ___SKINNY_LOOP +/* + * Round Constants + */ +unsigned char RC[] += { + 0x01, 0x00, 0x03, 0x00, 0x07, 0x00, 0x0f, 0x00, 0x0f, 0x01, 0x0e, 0x03, 0x0d, 0x03, 0x0b, 0x03, + 0x07, 0x03, 0x0f, 0x02, 0x0e, 0x01, 0x0c, 0x03, 0x09, 0x03, 0x03, 0x03, 0x07, 0x02, 0x0e, 0x00, + 0x0d, 0x01, 0x0a, 0x03, 0x05, 0x03, 0x0b, 0x02, 0x06, 0x01, 0x0c, 0x02, 0x08, 0x01, 0x00, 0x03, + 0x01, 0x02, 0x02, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x07, 0x01, 0x0e, 0x02, 0x0c, 0x01, 0x08, 0x03, + 0x01, 0x03, 0x03, 0x02, 0x06, 0x00, 0x0d, 0x00, 0x0b, 0x01, 0x06, 0x03, 0x0d, 0x02, 0x0a, 0x01, +#ifdef ___NUM_OF_ROUNDS_56 + 0x04, 0x03, 0x09, 0x02, 0x02, 0x01, 0x04, 0x02, 0x08, 0x00, 0x01, 0x01, 0x02, 0x02, 0x04, 0x00, + 0x09, 0x00, 0x03, 0x01, 0x06, 0x02, 0x0c, 0x00, 0x09, 0x01, 0x02, 0x03, 0x05, 0x02, 0x0a, 0x00, +#endif + }; +#endif + +extern void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2); +extern void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys); +#ifdef ___SKINNY_LOOP +extern void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys, unsigned char *pRC); +#else +extern void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys); +#endif + +void skinny_128_384_enc123_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K) +{ + uint32_t *pt = (uint32_t*)&pskinny_ctrl->roundKeys[0]; + + pt[0] = *(uint32_t*)(&CNT[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + pt[4] = *(uint32_t*)(&T[0]); + pack_word(T[7], T[4], T[5], T[6], pt[5]); + pt[6] = *(uint32_t*)(&T[8]); + pack_word(T[15], T[12], T[13], T[14], pt[7]); + + pt[8] = *(uint32_t*)(&K[0]); + pack_word(K[7], K[4], K[5], K[6], pt[9]); + pt[10] = *(uint32_t*)(&K[8]); + pack_word(K[15], K[12], K[13], K[14], pt[11]); + +#ifdef ___SKINNY_LOOP + RunEncryptionKeyScheduleTK3(pskinny_ctrl->roundKeys, RC); +#else + RunEncryptionKeyScheduleTK3(pskinny_ctrl->roundKeys); +#endif + RunEncryptionKeyScheduleTK2(pskinny_ctrl->roundKeys); + Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2); + + pskinny_ctrl->func_skinny_128_384_enc = skinny_128_384_enc12_12; + +} + +void skinny_128_384_enc12_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K) +{ + (void)K; + + uint32_t *pt = &pskinny_ctrl->roundKeys[0]; + + pt[0] = *(uint32_t*)(&CNT[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + pt[4] = *(uint32_t*)(&T[0]); + pack_word(T[7], T[4], T[5], T[6], pt[5]); + pt[6] = *(uint32_t*)(&T[8]); + pack_word(T[15], T[12], T[13], T[14], pt[7]); + + RunEncryptionKeyScheduleTK2(pskinny_ctrl->roundKeys); + Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2); + +} + +extern void skinny_128_384_enc1_1 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K) +{ + (void)T; + (void)K; + + uint32_t *pt = &pskinny_ctrl->roundKeys[0]; + + pt[0] = *(uint32_t*)(&CNT[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2); + +} + +#define PERMUTATION_TK1() \ + \ +/* permutation */ \ +{ \ + unsigned char tmp0 = roundKeys[0]; \ + unsigned char tmp1 = roundKeys[1]; \ + unsigned char tmp2 = roundKeys[2]; \ + unsigned char tmp3 = roundKeys[3]; \ + unsigned char tmp4 = roundKeys[4]; \ + unsigned char tmp5 = roundKeys[5]; \ + unsigned char tmp6 = roundKeys[6]; \ + unsigned char tmp7 = roundKeys[7]; \ + \ + unsigned char* dst = &roundKeys[8]; \ + \ + /* 5 7 2 3 6 0 4 1 */ \ + *dst++ = tmp1; \ + *dst++ = tmp4; \ + *dst++ = tmp0; \ + *dst++ = tmp6; \ + *dst++ = tmp3; \ + *dst++ = tmp2; \ + *dst++ = tmp7; \ + *dst++ = tmp5; \ + \ + /* 2 5 0 6 7 1 3 4 */ \ + *dst++ = tmp4; \ + *dst++ = tmp3; \ + *dst++ = tmp1; \ + *dst++ = tmp7; \ + *dst++ = tmp6; \ + *dst++ = tmp0; \ + *dst++ = tmp5; \ + *dst++ = tmp2; \ + \ + /* 0 2 1 7 5 4 6 3 */ \ + *dst++ = tmp3; \ + *dst++ = tmp6; \ + *dst++ = tmp4; \ + *dst++ = tmp5; \ + *dst++ = tmp7; \ + *dst++ = tmp1; \ + *dst++ = tmp2; \ + *dst++ = tmp0; \ + \ + /* 1 0 4 5 2 3 7 6 */ \ + *dst++ = tmp6; \ + *dst++ = tmp7; \ + *dst++ = tmp3; \ + *dst++ = tmp2; \ + *dst++ = tmp5; \ + *dst++ = tmp4; \ + *dst++ = tmp0; \ + *dst++ = tmp1; \ + \ + /* 4 1 3 2 0 6 5 7 */ \ + *dst++ = tmp7; \ + *dst++ = tmp5; \ + *dst++ = tmp6; \ + *dst++ = tmp0; \ + *dst++ = tmp2; \ + *dst++ = tmp3; \ + *dst++ = tmp1; \ + *dst++ = tmp4; \ + \ + /* 3 4 6 0 1 7 2 5 */ \ + *dst++ = tmp5; \ + *dst++ = tmp2; \ + *dst++ = tmp7; \ + *dst++ = tmp1; \ + *dst++ = tmp0; \ + *dst++ = tmp6; \ + *dst++ = tmp4; \ + *dst++ = tmp3; \ + \ + /* 6 3 7 1 4 5 0 2 */ \ + *dst++ = tmp2; \ + *dst++ = tmp0; \ + *dst++ = tmp5; \ + *dst++ = tmp4; \ + *dst++ = tmp1; \ + *dst++ = tmp7; \ + *dst++ = tmp3; \ + *dst++ = tmp6; \ +} + +#define SBOX_0(b0, b1, b2, b3) \ + \ + t0 = sbox[b0]; \ + t1 = sbox[b1]; \ + t2 = sbox[b2]; \ + t3 = sbox[b3]; \ + \ + b0 = (uint8_t)t0; \ + b1 = (uint8_t)t1; \ + b2 = (uint8_t)t2; \ + b3 = (uint8_t)t3; + +#define SBOX_8(b0, b1, b2, b3) \ + \ + t0 = sbox[b0]; \ + t1 = sbox[b1]; \ + t2 = sbox[b2]; \ + t3 = sbox[b3]; \ + \ + b0 = (uint8_t)t3; \ + b1 = (uint8_t)t0; \ + b2 = (uint8_t)t1; \ + b3 = (uint8_t)t2; + +#define SBOX_16(b0, b1, b2, b3) \ + \ + t0 = sbox2[b0]; /* AC(c2) */ \ + t1 = sbox[b1]; \ + t2 = sbox[b2]; \ + t3 = sbox[b3]; \ + \ + b0 = (uint8_t)t2; \ + b1 = (uint8_t)t3; \ + b2 = (uint8_t)t0; \ + b3 = (uint8_t)t1; + +#define SBOX_24(b0, b1, b2, b3) \ + \ + t0 = sbox[b0]; \ + t1 = sbox[b1]; \ + t2 = sbox[b2]; \ + t3 = sbox[b3]; \ + \ + b0 = (uint8_t)t1; \ + b1 = (uint8_t)t2; \ + b2 = (uint8_t)t3; \ + b3 = (uint8_t)t0; + +#ifdef ___ENABLE_DWORD_CAST + +#define SKINNY_MAIN() \ +{ \ + \ + /* odd */ \ + \ + /* LUT(with ShiftRows & AC(c2))*/ \ + \ + SBOX_0( block[0], block[1], block[2], block[3]); \ + SBOX_8( block[4], block[5], block[6], block[7]); \ + SBOX_16(block[8], block[9], block[10], block[11]); \ + SBOX_24(block[12], block[13], block[14], block[15]); \ + \ + /* TK1^TK2^TK3^AC(c0 c1) */ \ + \ + t1 = *(uint64_t*)&block[0]; \ + t1 ^= *tk1++; \ + t1 ^= *tk2++; \ + \ + /* MC */ \ + \ + t2 = *(uint64_t*)&block[8]; \ + t0 = t2 >> 32; \ + \ + /* 0^2 */ \ + t3 = t1 ^ t2; \ + \ + /* 1^2 */ \ + t2 = (t1 >> 32) ^ t2; \ + \ + /* 0^2^3 */ \ + t0 = t0 ^ t3; \ + \ + *(uint32_t*)&block[0] = (uint32_t)t0; \ + *(uint32_t*)&block[4] = (uint32_t)t1; \ + *(uint32_t*)&block[8] = (uint32_t)t2; \ + *(uint32_t*)&block[12] = (uint32_t)t3; \ + \ + /* even */ \ + \ + /* LUT(with ShiftRows & AC(c2))*/ \ + \ + SBOX_0( block[0], block[1], block[2], block[3]); \ + SBOX_8( block[4], block[5], block[6], block[7]); \ + SBOX_16(block[8], block[9], block[10], block[11]); \ + SBOX_24(block[12], block[13], block[14], block[15]); \ + \ + /* TK2^TK3^AC(c0 c1) */ \ + \ + t1 = *(uint64_t*)&block[0]; \ + t1 ^= *tk2++; \ + \ + /* MC */ \ + \ + t2 = *(uint64_t*)&block[8]; \ + t0 = t2 >> 32; \ + \ + /* 0^2 */ \ + t3 = t1 ^ t2; \ + \ + /* 1^2 */ \ + t2 = (t1 >> 32) ^ t2; \ + \ + /* 0^2^3 */ \ + t0 = t0 ^ t3; \ + \ + *(uint32_t*)&block[0] = (uint32_t)t0; \ + *(uint32_t*)&block[4] = (uint32_t)t1; \ + *(uint32_t*)&block[8] = (uint32_t)t2; \ + *(uint32_t*)&block[12] = (uint32_t)t3; \ +} + +#ifndef ___SKINNY_LOOP + +void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint64_t *tk1; + uint64_t *tk2; + uint64_t t0; // used in MACRO + uint64_t t1; // used in MACRO + uint64_t t2; // used in MACRO + uint64_t t3; // used in MACRO + +// TK1 + + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + + tk2 = (uint64_t*)&roundKeys[64]; + tk1 = (uint64_t*)&roundKeys[0]; + + // 1st, ...,16th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint64_t*)&roundKeys[0]; + + // 17th, ...,32th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint64_t*)&roundKeys[0]; + + // 33th, ...,40th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th, ...,48th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint64_t*)&roundKeys[0]; + + // 49th, ... ,56th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint64_t *tk1; + uint64_t *tk2; + uint64_t t0; // used in MACRO + uint64_t t1; // used in MACRO + uint64_t t2; // used in MACRO + uint64_t t3; // used in MACRO + +// TK1 + + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + + tk2 = (uint64_t*)&roundKeys[64]; + + // 1st, ... ,32th or 48th round +#ifndef ___NUM_OF_ROUNDS_56 + for(int j=0;j<2;j++) +#else + for(int j=0;j<3;j++) +#endif + { + tk1 = (uint64_t*)&roundKeys[0]; + for(int i=0;i<8;i++) + { + SKINNY_MAIN(); + } + } + + // 33th , ... ,40th or 49th, .... ,56th round + { + tk1 = (uint64_t*)&roundKeys[0]; + for(int i=0;i<4;i++) + { + SKINNY_MAIN(); + } + } +} + +#endif /* ___SKINNY_LOOP */ + +#else /* ___ENABLE_DWORD_CAST */ + +#define SKINNY_MAIN() \ +{ \ + \ + /* odd */ \ + \ + /* LUT(with ShiftRows & AC(c2))*/ \ + \ + SBOX_0( block[0], block[1], block[2], block[3]); \ + SBOX_8( block[4], block[5], block[6], block[7]); \ + SBOX_16(block[8], block[9], block[10], block[11]); \ + SBOX_24(block[12], block[13], block[14], block[15]); \ + \ + /* TK1^TK2^TK3^AC(c0 c1) */ \ + \ + t1 = *(uint32_t*)&block[0]; \ + t0 = *(uint32_t*)&block[4]; \ + t1 ^= *tk1++; \ + t1 ^= *tk2++; \ + t0 ^= *tk1++; \ + t0 ^= *tk2++; \ + \ + /* MC */ \ + \ + t2 = *(uint32_t*)&block[8]; \ + t4 = *(uint32_t*)&block[12]; \ + \ + /* 0^2 */ \ + t3 = t1 ^ t2; \ + \ + /* 1^2 */ \ + t2 = t0 ^ t2; \ + \ + /* 0^2^3 */ \ + t0 = t3 ^ t4; \ + \ + *(uint32_t*)&block[0] = t0; \ + *(uint32_t*)&block[4] = t1; \ + *(uint32_t*)&block[8] = t2; \ + *(uint32_t*)&block[12] = t3; \ + \ + /* even */ \ + \ + /* LUT(with ShiftRows & AC(c2))*/ \ + \ + SBOX_0( block[0], block[1], block[2], block[3]); \ + SBOX_8( block[4], block[5], block[6], block[7]); \ + SBOX_16(block[8], block[9], block[10], block[11]); \ + SBOX_24(block[12], block[13], block[14], block[15]); \ + \ + /* TK2^TK3^AC(c0 c1) */ \ + \ + t1 = *(uint32_t*)&block[0]; \ + t0 = *(uint32_t*)&block[4]; \ + t1 ^= *tk2++; \ + t0 ^= *tk2++; \ + \ + /* MC */ \ + \ + t2 = *(uint32_t*)&block[8]; \ + t4 = *(uint32_t*)&block[12]; \ + \ + /* 0^2 */ \ + t3 = t1 ^ t2; \ + \ + /* 1^2 */ \ + t2 = t0 ^ t2; \ + \ + /* 0^2^3 */ \ + t0 = t3 ^ t4; \ + \ + *(uint32_t*)&block[0] = t0; \ + *(uint32_t*)&block[4] = t1; \ + *(uint32_t*)&block[8] = t2; \ + *(uint32_t*)&block[12] = t3; \ +} + +#ifndef ___SKINNY_LOOP + +void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint32_t *tk1; + uint32_t *tk2; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t t3; // used in MACRO + uint32_t t4; // used in MACRO + +// TK1 + + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + + tk2 = (uint32_t*)&roundKeys[64]; + tk1 = (uint32_t*)&roundKeys[0]; + + // 1st, ...,16th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint32_t*)&roundKeys[0]; + + // 17th, ...,32th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint32_t*)&roundKeys[0]; + + // 33th, ...,40th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th, ...,48th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint32_t*)&roundKeys[0]; + + // 49th, ... ,56th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint32_t *tk1; + uint32_t *tk2; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t t3; // used in MACRO + uint32_t t4; // used in MACRO + +// TK1 + + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + + tk2 = (uint32_t*)&roundKeys[64]; + + // 1st, ... ,32th or 48th round +#ifndef ___NUM_OF_ROUNDS_56 + for(int j=0;j<2;j++) +#else + for(int j=0;j<3;j++) +#endif + { + tk1 = (uint32_t*)&roundKeys[0]; + for(int i=0;i<8;i++) + { + SKINNY_MAIN(); + } + } + + // 33th , ... ,40th or 49th, .... ,56th round + { + tk1 = (uint32_t*)&roundKeys[0]; + for(int i=0;i<4;i++) + { + SKINNY_MAIN(); + } + } +} + +#endif /* ___SKINNY_LOOP */ + +#endif /* ___ENABLE_DWORD_CAST */ + diff --git a/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/api.h b/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/api.h new file mode 100644 index 0000000..a4aa567 --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 diff --git a/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/encrypt.c b/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/encrypt.c new file mode 100644 index 0000000..495399b --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/encrypt.c @@ -0,0 +1,1337 @@ +/* + * Date: 29 November 2018 + * Contact: Thomas Peyrin - thomas.peyrin@gmail.com + * Mustafa Khairallah - mustafam001@e.ntu.edu.sg + */ + +#include "crypto_aead.h" +#include "api.h" +#include "skinny.h" +#include +#include + +void pad (const unsigned char* m, unsigned char* mp, int len8) { + +#ifdef ___ENABLE_DWORD_CAST + + if (0 == len8) { + *(uint64_t*)(&mp[0]) = 0; + *(uint64_t*)(&mp[8]) = 0; + } else if (8 > len8) { + *(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]) & (0xffffffffffffffff >> (64 - len8*8)); + *(uint64_t*)(&mp[8]) = 0; + mp[15] = len8; + } else if (8 == len8) { + *(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]); + *(uint64_t*)(&mp[8]) = 0; + mp[15] = 8; + } else if (16 > len8) { + *(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]); + *(uint64_t*)(&mp[8]) = *(uint64_t*)(&m[8]) & (0xffffffffffffffff >> (128 - len8*8)); + mp[15] = len8; + } else { + *(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]); + *(uint64_t*)(&mp[8]) = *(uint64_t*)(&m[8]); + } + +#else + + if (0 == len8) { + *(uint32_t*)(&mp[0]) = 0; + *(uint32_t*)(&mp[4]) = 0; + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + } else if (4 > len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]) & (0xffffffff >> (32 - len8*8)); + *(uint32_t*)(&mp[4]) = 0; + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = len8; + } else if (4 == len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = 0; + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = 4; + } else if (8 > len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]) & (0xffffffff >> (64 - len8*8)); + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = len8; + } else if (8 == len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = 8; + } else if (12 > len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]) & (0xffffffff >> (96 - len8*8)); + *(uint32_t*)(&mp[12]) = 0; + mp[15] = len8; + } else if (12 == len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]); + *(uint32_t*)(&mp[12]) = 0; + mp[15] = 12; + } else if (16 > len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]); + *(uint32_t*)(&mp[12]) = *(uint32_t*)(&m[12]) & (0xffffffff >> (128 - len8*8)); + mp[15] = len8; + } else { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]); + *(uint32_t*)(&mp[12]) = *(uint32_t*)(&m[12]); + } + +#endif + +} + +void g8A (unsigned char* s, unsigned char* c) { + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t s0 = *(uint64_t*)(&s[0]); + uint64_t s1 = *(uint64_t*)(&s[8]); + + uint64_t c0, c1; + + c0 = ((s0 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x8080808080808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x8080808080808080); + + *(uint64_t*)(&c[0]) = c0; + *(uint64_t*)(&c[8]) = c1; + +#else + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t c0, c1, c2, c3; + + c0 = ((s0 >> 1) & 0x7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x80808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x80808080); + c2 = ((s2 >> 1) & 0x7f7f7f7f) ^ ((s2 ^ (s2 << 7)) & 0x80808080); + c3 = ((s3 >> 1) & 0x7f7f7f7f) ^ ((s3 ^ (s3 << 7)) & 0x80808080); + + *(uint32_t*)(&c[0]) = c0; + *(uint32_t*)(&c[4]) = c1; + *(uint32_t*)(&c[8]) = c2; + *(uint32_t*)(&c[12]) = c3; + +#endif + +} + +void g8A_for_Tag_Generation (unsigned char* s, unsigned char* c) { + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t s0 = *(uint64_t*)(&s[0]); + uint64_t s1 = *(uint64_t*)(&s[8]); + + uint64_t c0, c1; + + c0 = ((s0 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x8080808080808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x8080808080808080); + + // use byte access because of memory alignment. + // c is not always in word(4 byte) alignment. + c[0] = c0 &0xFF; + c[1] = (c0>>8) &0xFF; + c[2] = (c0>>16)&0xFF; + c[3] = (c0>>24)&0xFF; + c[4] = (c0>>32)&0xFF; + c[5] = (c0>>40)&0xFF; + c[6] = (c0>>48)&0xFF; + c[7] = c0>>56; + c[8] = c1 &0xFF; + c[9] = (c1>>8) &0xFF; + c[10] = (c1>>16)&0xFF; + c[11] = (c1>>24)&0xFF; + c[12] = (c1>>32)&0xFF; + c[13] = (c1>>40)&0xFF; + c[14] = (c1>>48)&0xFF; + c[15] = c1>>56; + +#else + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t c0, c1, c2, c3; + + c0 = ((s0 >> 1) & 0x7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x80808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x80808080); + c2 = ((s2 >> 1) & 0x7f7f7f7f) ^ ((s2 ^ (s2 << 7)) & 0x80808080); + c3 = ((s3 >> 1) & 0x7f7f7f7f) ^ ((s3 ^ (s3 << 7)) & 0x80808080); + + // use byte access because of memory alignment. + // c is not always in word(4 byte) alignment. + c[0] = c0 &0xFF; + c[1] = (c0>>8) &0xFF; + c[2] = (c0>>16)&0xFF; + c[3] = c0>>24; + c[4] = c1 &0xFF; + c[5] = (c1>>8) &0xFF; + c[6] = (c1>>16)&0xFF; + c[7] = c1>>24; + c[8] = c2 &0xFF; + c[9] = (c2>>8) &0xFF; + c[10] = (c2>>16)&0xFF; + c[11] = c2>>24; + c[12] = c3 &0xFF; + c[13] = (c3>>8) &0xFF; + c[14] = (c3>>16)&0xFF; + c[15] = c3>>24; + +#endif + +} + +void rho_ad_eqov16 ( + const unsigned char* m, + unsigned char* s) { + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) ^= *(uint64_t*)(&m[0]); + *(uint64_t*)(&s[8]) ^= *(uint64_t*)(&m[8]); + +#else + + *(uint32_t*)(&s[0]) ^= *(uint32_t*)(&m[0]); + *(uint32_t*)(&s[4]) ^= *(uint32_t*)(&m[4]); + *(uint32_t*)(&s[8]) ^= *(uint32_t*)(&m[8]); + *(uint32_t*)(&s[12]) ^= *(uint32_t*)(&m[12]); + +#endif + +} + +void rho_ad_ud16 ( + const unsigned char* m, + unsigned char* s, + int len8) { + + unsigned char mp [16]; + pad(m,mp,len8); + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) ^= *(uint64_t*)(&mp[0]); + *(uint64_t*)(&s[8]) ^= *(uint64_t*)(&mp[8]); + +#else + + *(uint32_t*)(&s[0]) ^= *(uint32_t*)(&mp[0]); + *(uint32_t*)(&s[4]) ^= *(uint32_t*)(&mp[4]); + *(uint32_t*)(&s[8]) ^= *(uint32_t*)(&mp[8]); + *(uint32_t*)(&s[12]) ^= *(uint32_t*)(&mp[12]); + +#endif + +} + +void rho_eqov16 ( + const unsigned char* m, + unsigned char* c, + unsigned char* s) { + + g8A(s,c); + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t c0 = *(uint64_t*)(&c[0]); + uint64_t c1 = *(uint64_t*)(&c[8]); + + uint64_t s0 = *(uint64_t*)(&s[0]); + uint64_t s1 = *(uint64_t*)(&s[8]); + + uint64_t m0 = *(uint64_t*)(&m[0]); + uint64_t m1 = *(uint64_t*)(&m[8]); + + s0 ^= m0; + s1 ^= m1; + + c0 ^= m0; + c1 ^= m1; + + *(uint64_t*)(&s[0]) = s0; + *(uint64_t*)(&s[8]) = s1; + + *(uint64_t*)(&c[0]) = c0; + *(uint64_t*)(&c[8]) = c1; + +#else + + uint32_t c0 = *(uint32_t*)(&c[0]); + uint32_t c1 = *(uint32_t*)(&c[4]); + uint32_t c2 = *(uint32_t*)(&c[8]); + uint32_t c3 = *(uint32_t*)(&c[12]); + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t m0 = *(uint32_t*)(&m[0]); + uint32_t m1 = *(uint32_t*)(&m[4]); + uint32_t m2 = *(uint32_t*)(&m[8]); + uint32_t m3 = *(uint32_t*)(&m[12]); + + s0 ^= m0; + s1 ^= m1; + s2 ^= m2; + s3 ^= m3; + + c0 ^= m0; + c1 ^= m1; + c2 ^= m2; + c3 ^= m3; + + *(uint32_t*)(&s[0]) = s0; + *(uint32_t*)(&s[4]) = s1; + *(uint32_t*)(&s[8]) = s2; + *(uint32_t*)(&s[12]) = s3; + + *(uint32_t*)(&c[0]) = c0; + *(uint32_t*)(&c[4]) = c1; + *(uint32_t*)(&c[8]) = c2; + *(uint32_t*)(&c[12]) = c3; + +#endif + +} + +void rho_ud16 ( + const unsigned char* m, + unsigned char* c, + unsigned char* s, + int len8) { + + unsigned char mp [16]; + + pad(m,mp,len8); + + g8A(s,c); + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t mp0 = *(uint64_t*)&mp[0]; + uint64_t mp1 = *(uint64_t*)&mp[8]; + uint64_t c0 = *(uint64_t*)&c[0]; + uint64_t c1 = *(uint64_t*)&c[8]; + + *(uint64_t*)(&s[0]) ^= mp0; + *(uint64_t*)(&s[8]) ^= mp1; + + if (0 == len8) { + c0 = 0; + c1 = 0; + } else if (8 > len8) { + c0 = c0 ^ (mp0 & 0xffffffffffffffff >> (64 - (len8*8))); + c0 = c0 ^ (c0 & 0xffffffffffffffff << ( (len8*8))); + c1 = 0; + } else if (8 == len8) { + c0 = c0 ^ mp0; + c1 = 0; + } else if (16 > len8) { + len8 -= 8; + c0 = c0 ^ mp0; + c1 = c1 ^ (mp1 & 0xffffffffffffffff >> (64 - (len8*8))); + c1 = c1 ^ (c1 & 0xffffffffffffffff << ( (len8*8))); + } else { + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + } + + *(uint64_t*)&c[0] = c0; + *(uint64_t*)&c[8] = c1; + +#else + + uint32_t mp0 = *(uint32_t*)&mp[0]; + uint32_t mp1 = *(uint32_t*)&mp[4]; + uint32_t mp2 = *(uint32_t*)&mp[8]; + uint32_t mp3 = *(uint32_t*)&mp[12]; + uint32_t c0 = *(uint32_t*)&c[0]; + uint32_t c1 = *(uint32_t*)&c[4]; + uint32_t c2 = *(uint32_t*)&c[8]; + uint32_t c3 = *(uint32_t*)&c[12]; + + *(uint32_t*)(&s[0]) ^= mp0; + *(uint32_t*)(&s[4]) ^= mp1; + *(uint32_t*)(&s[8]) ^= mp2; + *(uint32_t*)(&s[12]) ^= mp3; + + if (0 == len8) { + c0 = 0; + c1 = 0; + c2 = 0; + c3 = 0; + } else if (4 > len8) { + c0 = c0 ^ (mp0 & 0xffffffff >> (32 - (len8*8))); + c0 = c0 ^ (c0 & 0xffffffff << ( (len8*8))); + c1 = 0; + c2 = 0; + c3 = 0; + } else if (4 == len8) { + c0 = c0 ^ mp0; + c1 = 0; + c2 = 0; + c3 = 0; + } else if (8 > len8) { + len8 -= 4; + c0 = c0 ^ mp0; + c1 = c1 ^ (mp1 & 0xffffffff >> (32 - (len8*8))); + c1 = c1 ^ (c1 & 0xffffffff << ( (len8*8))); + c2 = 0; + c3 = 0; + } else if (8 == len8) { + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = 0; + c3 = 0; + } else if (12 > len8) { + len8 -= 8; + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = c2 ^ (mp2 & 0xffffffff >> (32 - (len8*8))); + c2 = c2 ^ (c2 & 0xffffffff << ( (len8*8))); + c3 = 0; + } else if (12 == len8) { + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = c2 ^ mp2; + c3 = 0; + } else if (16 > len8) { + len8 -= 12; + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = c2 ^ mp2; + c3 = c3 ^ (mp3 & 0xffffffff >> (32 - (len8*8))); + c3 = c3 ^ (c3 & 0xffffffff << ( (len8*8))); + } else { + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = c2 ^ mp2; + c3 = c3 ^ mp3; + } + + *(uint32_t*)&c[0] = c0; + *(uint32_t*)&c[4] = c1; + *(uint32_t*)&c[8] = c2; + *(uint32_t*)&c[12] = c3; + +#endif + +} + +void irho_eqov16 ( + unsigned char* m, + const unsigned char* c, + unsigned char* s) { + + g8A(s,m); + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t c0 = *(uint64_t*)(&c[0]); + uint64_t c1 = *(uint64_t*)(&c[8]); + + uint64_t s0 = *(uint64_t*)(&s[0]); + uint64_t s1 = *(uint64_t*)(&s[8]); + + uint64_t m0 = *(uint64_t*)(&m[0]); + uint64_t m1 = *(uint64_t*)(&m[8]); + + s0 ^= c0 ^ m0; + s1 ^= c1 ^ m1; + + m0 ^= c0; + m1 ^= c1; + + *(uint64_t*)(&s[0]) = s0; + *(uint64_t*)(&s[8]) = s1; + + *(uint64_t*)(&m[0]) = m0; + *(uint64_t*)(&m[8]) = m1; + +#else + + uint32_t c0 = *(uint32_t*)(&c[0]); + uint32_t c1 = *(uint32_t*)(&c[4]); + uint32_t c2 = *(uint32_t*)(&c[8]); + uint32_t c3 = *(uint32_t*)(&c[12]); + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t m0 = *(uint32_t*)(&m[0]); + uint32_t m1 = *(uint32_t*)(&m[4]); + uint32_t m2 = *(uint32_t*)(&m[8]); + uint32_t m3 = *(uint32_t*)(&m[12]); + + s0 ^= c0 ^ m0; + s1 ^= c1 ^ m1; + s2 ^= c2 ^ m2; + s3 ^= c3 ^ m3; + + m0 ^= c0; + m1 ^= c1; + m2 ^= c2; + m3 ^= c3; + + *(uint32_t*)(&s[0]) = s0; + *(uint32_t*)(&s[4]) = s1; + *(uint32_t*)(&s[8]) = s2; + *(uint32_t*)(&s[12]) = s3; + + *(uint32_t*)(&m[0]) = m0; + *(uint32_t*)(&m[4]) = m1; + *(uint32_t*)(&m[8]) = m2; + *(uint32_t*)(&m[12]) = m3; + +#endif + +} + +void irho_ud16 ( + unsigned char* m, + const unsigned char* c, + unsigned char* s, + int len8) { + + unsigned char cp [16]; + + pad(c,cp,len8); + + g8A(s,m); + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t cp0 = *(uint64_t*)&cp[0]; + uint64_t cp1 = *(uint64_t*)&cp[8]; + uint64_t m0 = *(uint64_t*)&m[0]; + uint64_t m1 = *(uint64_t*)&m[8]; + uint64_t s0 = *(uint64_t*)&s[0]; + uint64_t s1 = *(uint64_t*)&s[8]; + + s0 ^= cp0; + s1 ^= cp1; + + if (0 == len8) { + m0 = 0; + m1 = 0; + } else if (8 > len8) { + s0 = s0 ^ (m0 & 0xffffffffffffffff >> (64 - (len8*8))); + + m0 = m0 ^ (cp0 & 0xffffffffffffffff >> (64 - (len8*8))); + m0 = m0 ^ (m0 & 0xffffffffffffffff << ( (len8*8))); + m1 = 0; + } else if (8 == len8) { + s0 = s0 ^ m0; + + m0 = m0 ^ cp0; + m1 = 0; + } else if (16 > len8) { + len8 -= 8; + s0 = s0 ^ m0; + s1 = s1 ^ (m1 & 0xffffffffffffffff >> (64 - (len8*8))); + + m0 = m0 ^ cp0; + m1 = m1 ^ (cp1 & 0xffffffffffffffff >> (64 - (len8*8))); + m1 = m1 ^ (m1 & 0xffffffffffffffff << ( (len8*8))); + } else { + s0 = s0 ^ m0; + s1 = s1 ^ m1; + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + } + + *(uint64_t*)&s[0] = s0; + *(uint64_t*)&s[8] = s1; + *(uint64_t*)&m[0] = m0; + *(uint64_t*)&m[8] = m1; + +#else + + uint32_t cp0 = *(uint32_t*)&cp[0]; + uint32_t cp1 = *(uint32_t*)&cp[4]; + uint32_t cp2 = *(uint32_t*)&cp[8]; + uint32_t cp3 = *(uint32_t*)&cp[12]; + uint32_t m0 = *(uint32_t*)&m[0]; + uint32_t m1 = *(uint32_t*)&m[4]; + uint32_t m2 = *(uint32_t*)&m[8]; + uint32_t m3 = *(uint32_t*)&m[12]; + uint32_t s0 = *(uint32_t*)&s[0]; + uint32_t s1 = *(uint32_t*)&s[4]; + uint32_t s2 = *(uint32_t*)&s[8]; + uint32_t s3 = *(uint32_t*)&s[12]; + + s0 ^= cp0; + s1 ^= cp1; + s2 ^= cp2; + s3 ^= cp3; + + if (0 == len8) { + m0 = 0; + m1 = 0; + m2 = 0; + m3 = 0; + } else if (4 > len8) { + s0 = s0 ^ (m0 & 0xffffffff >> (32 - (len8*8))); + + m0 = m0 ^ (cp0 & 0xffffffff >> (32 - (len8*8))); + m0 = m0 ^ (m0 & 0xffffffff << ( (len8*8))); + m1 = 0; + m2 = 0; + m3 = 0; + } else if (4 == len8) { + s0 = s0 ^ m0; + + m0 = m0 ^ cp0; + m1 = 0; + m2 = 0; + m3 = 0; + } else if (8 > len8) { + len8 -= 4; + s0 = s0 ^ m0; + s1 = s1 ^ (m1 & 0xffffffff >> (32 - (len8*8))); + + m0 = m0 ^ cp0; + m1 = m1 ^ (cp1 & 0xffffffff >> (32 - (len8*8))); + m1 = m1 ^ (m1 & 0xffffffff << ( (len8*8))); + m2 = 0; + m3 = 0; + } else if (8 == len8) { + s0 = s0 ^ m0; + s1 = s1 ^ m1; + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = 0; + m3 = 0; + } else if (12 > len8) { + len8 -= 8; + s0 = s0 ^ m0; + s1 = s1 ^ m1; + s2 = s2 ^ (m2 & 0xffffffff >> (32 - (len8*8))); + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = m2 ^ (cp2 & 0xffffffff >> (32 - (len8*8))); + m2 = m2 ^ (m2 & 0xffffffff << ( (len8*8))); + m3 = 0; + } else if (12 == len8) { + s0 = s0 ^ m0; + s1 = s1 ^ m1; + s2 = s2 ^ m2; + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = m2 ^ cp2; + m3 = 0; + } else if (16 > len8) { + len8 -= 12; + s0 = s0 ^ m0; + s1 = s1 ^ m1; + s2 = s2 ^ m2; + s3 = s3 ^ (m3 & 0xffffffff >> (32 - (len8*8))); + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = m2 ^ cp2; + m3 = m3 ^ (cp3 & 0xffffffff >> (32 - (len8*8))); + m3 = m3 ^ (m3 & 0xffffffff << ( (len8*8))); + } else { + s0 = s0 ^ m0; + s1 = s1 ^ m1; + s2 = s2 ^ m2; + s3 = s3 ^ m3; + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = m2 ^ cp2; + m3 = m3 ^ cp3; + } + + *(uint32_t*)&s[0] = s0; + *(uint32_t*)&s[4] = s1; + *(uint32_t*)&s[8] = s2; + *(uint32_t*)&s[12] = s3; + *(uint32_t*)&m[0] = m0; + *(uint32_t*)&m[4] = m1; + *(uint32_t*)&m[8] = m2; + *(uint32_t*)&m[12] = m3; + +#endif + +} + +void reset_lfsr_gf56 (unsigned char* CNT) { + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&CNT[0]) = 0x0000000000000001; // CNT7 CNT6 CNT5 CNT4 CNT3 CNT2 CNT1 CNT0 + +#else + + *(uint32_t*)(&CNT[0]) = 0x00000001; // CNT3 CNT2 CNT1 CNT0 + *(uint32_t*)(&CNT[4]) = 0x00000000; // CNT7 CNT6 CNT5 CNT4 + +#endif + +} + +void lfsr_gf56 (unsigned char* CNT) { + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t C0; + uint64_t fb0; + + C0 = *(uint64_t*)(&CNT[0]); // CNT7 CNT6 CNT5 CNT4 CNT3 CNT2 CNT1 CNT0 + + fb0 = 0; + if (CNT[6] & 0x80) { + fb0 = 0x95; + } + + C0 = C0 << 1 ^ fb0; + + *(uint64_t*)(&CNT[0]) = C0; + +#else + + uint32_t C0; + uint32_t C1; + uint32_t fb0; + + C0 = *(uint32_t*)(&CNT[0]); // CNT3 CNT2 CNT1 CNT0 + C1 = *(uint32_t*)(&CNT[4]); // CNT7 CNT6 CNT5 CNT4 + + fb0 = 0; + if (CNT[6] & 0x80) { + fb0 = 0x95; + } + + C1 = C1 << 1 | C0 >> 31; + C0 = C0 << 1 ^ fb0; + + *(uint32_t*)(&CNT[0]) = C0; + *(uint32_t*)(&CNT[4]) = C1; + +#endif + +} + +void block_cipher( + unsigned char* s, + const unsigned char* k, unsigned char* T, + unsigned char* CNT, unsigned char D, + skinny_ctrl* p_skinny_ctrl) { + + CNT[7] = D; + p_skinny_ctrl->func_skinny_128_384_enc(s, p_skinny_ctrl, CNT, T, k); + +} + +void nonce_encryption ( + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + skinny_ctrl* p_skinny_ctrl) { + + block_cipher(s,k,(unsigned char*)N,CNT,D,p_skinny_ctrl); + +} + +void generate_tag ( + unsigned char** c, unsigned char* s, + unsigned long long* clen) { + + g8A_for_Tag_Generation(s, *c); + + *c = *c + 16; + *c = *c - *clen; + +} + +unsigned long long msg_encryption ( + const unsigned char** M, unsigned char** c, + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long mlen, + skinny_ctrl* l_skinny_ctrl) { + + int len8; + + if (mlen >= 16) { + len8 = 16; + mlen = mlen - 16; + rho_eqov16(*M, *c, s); + } + else { + len8 = mlen; + mlen = 0; + rho_ud16(*M, *c, s, len8); + } + *c = *c + len8; + *M = *M + len8; + lfsr_gf56(CNT); + if (mlen != 0) { + nonce_encryption(N,CNT,s,k,D,l_skinny_ctrl); + } + return mlen; + +} + +unsigned long long msg_decryption ( + unsigned char** M, const unsigned char** c, + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long clen, + skinny_ctrl* l_skinny_ctrl) { + + int len8; + + if (clen >= 16) { + len8 = 16; + clen = clen - 16; + irho_eqov16(*M, *c, s); + } + else { + len8 = clen; + clen = 0; + irho_ud16(*M, *c, s, len8); + } + *c = *c + len8; + *M = *M + len8; + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,D,l_skinny_ctrl); + return clen; + +} + +unsigned long long ad2msg_encryption ( + const unsigned char** M, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long mlen, + skinny_ctrl* l_skinny_ctrl) { + + unsigned char T [16]; + int len8; + + if (mlen <= 16) { + len8 = mlen; + mlen = 0; + } + else { + len8 = 16; + mlen = mlen - 16; + } + + pad (*M,T,len8); + block_cipher(s,k,T,CNT,D,l_skinny_ctrl); + lfsr_gf56(CNT); + *M = *M + len8; + + return mlen; + +} + +unsigned long long ad_encryption ( + const unsigned char** A, unsigned char* s, + const unsigned char* k, unsigned long long adlen, + unsigned char* CNT, + unsigned char D, + skinny_ctrl* l_skinny_ctrl) { + + unsigned char T [16]; + int len8; + + if (adlen >= 16) { + len8 = 16; + adlen = adlen - 16; + + rho_ad_eqov16(*A, s); + } + else { + len8 = adlen; + adlen = 0; + rho_ad_ud16(*A, s, len8); + } + *A = *A + len8; + lfsr_gf56(CNT); + if (adlen != 0) { + if (adlen >= 16) { + len8 = 16; + adlen = adlen - 16; + } + else { + len8 = adlen; + adlen = 0; + } + pad(*A, T, len8); + *A = *A + len8; + block_cipher(s,k,T,CNT,D,l_skinny_ctrl); + lfsr_gf56(CNT); + } + + return adlen; + +} + +int crypto_aead_encrypt ( + unsigned char* c, unsigned long long* clen, + const unsigned char* m, unsigned long long mlen, + const unsigned char* ad, unsigned long long adlen, + const unsigned char* nsec, + const unsigned char* npub, + const unsigned char* k) { + + unsigned char s[16]; + unsigned char CNT[8]; + unsigned char T[16]; + const unsigned char* N; + unsigned char w; + unsigned long long xlen; + + skinny_ctrl l_skinny_ctrl; + l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc123_12; + + (void)nsec; + N = npub; + + xlen = mlen; + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) = 0; + *(uint64_t*)(&s[8]) = 0; + +#else + + *(uint32_t*)(&s[0]) = 0; + *(uint32_t*)(&s[4]) = 0; + *(uint32_t*)(&s[8]) = 0; + *(uint32_t*)(&s[12]) = 0; + +#endif + + reset_lfsr_gf56(CNT); + + w = 48; + + if (adlen == 0) { + w = w ^ 2; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else if (adlen%(32) == 0) { + w = w ^ 8; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else if (adlen%(32) < 16) { + w = w ^ 2; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else if (adlen%(32) == 16) { + w = w ^ 0; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else { + w = w ^ 10; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + + if (adlen == 0) { // AD is an empty string + lfsr_gf56(CNT); + } + else while (adlen > 0) { + adlen = ad_encryption(&ad,s,k,adlen,CNT,40,&l_skinny_ctrl); + } + + if ((w & 8) == 0) { + xlen = ad2msg_encryption (&m,CNT,s,k,44,xlen,&l_skinny_ctrl); + } + else if (mlen == 0) { + lfsr_gf56(CNT); + } + while (xlen > 0) { + xlen = ad_encryption(&m,s,k,xlen,CNT,44,&l_skinny_ctrl); + } + nonce_encryption(N,CNT,s,k,w,&l_skinny_ctrl); + + // Tag generation + g8A(s, T); + + m = m - mlen; + + l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1; + + reset_lfsr_gf56(CNT); + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) = *(uint64_t*)(&T[0]); + *(uint64_t*)(&s[8]) = *(uint64_t*)(&T[8]); + +#else + + *(uint32_t*)(&s[0]) = *(uint32_t*)(&T[0]); + *(uint32_t*)(&s[4]) = *(uint32_t*)(&T[4]); + *(uint32_t*)(&s[8]) = *(uint32_t*)(&T[8]); + *(uint32_t*)(&s[12]) = *(uint32_t*)(&T[12]); + +#endif + + *clen = mlen + 16; + + if (mlen > 0) { + nonce_encryption(N,CNT,s,k,36,&l_skinny_ctrl); + while (mlen > 16) { + mlen = msg_encryption(&m,&c,N,CNT,s,k,36,mlen,&l_skinny_ctrl); + } + rho_ud16(m, c, s, mlen); + c = c + mlen; + m = m + mlen; + } + + // Tag Concatenation + c[0] = T[0]; + c[1] = T[1]; + c[2] = T[2]; + c[3] = T[3]; + c[4] = T[4]; + c[5] = T[5]; + c[6] = T[6]; + c[7] = T[7]; + c[8] = T[8]; + c[9] = T[9]; + c[10] = T[10]; + c[11] = T[11]; + c[12] = T[12]; + c[13] = T[13]; + c[14] = T[14]; + c[15] = T[15]; + + c = c - *clen; + + return 0; + +} + +int crypto_aead_decrypt( + unsigned char *m,unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c,unsigned long long clen, + const unsigned char *ad,unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k) { + + unsigned char s[16]; + unsigned char CNT[8]; + unsigned char T[16]; + const unsigned char* N; + unsigned char w; + unsigned long long xlen; + const unsigned char* mauth; + unsigned char* p1; + unsigned char* p2; + + skinny_ctrl l_skinny_ctrl; + l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc123_12; + + (void)nsec; + mauth = m; + + N = npub; + + xlen = clen-16; + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) = 0; + *(uint64_t*)(&s[8]) = 0; + +#else + + *(uint32_t*)(&s[0]) = 0; + *(uint32_t*)(&s[4]) = 0; + *(uint32_t*)(&s[8]) = 0; + *(uint32_t*)(&s[12]) = 0; + +#endif + + reset_lfsr_gf56(CNT); + + w = 48; + + if (adlen == 0) { + w = w ^ 2; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else if (adlen%(32) == 0) { + w = w ^ 8; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else if (adlen%(32) < 16) { + w = w ^ 2; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else if (adlen%(32) == 16) { + w = w ^ 0; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else { + w = w ^ 10; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + + if (adlen == 0) { // AD is an empty string + lfsr_gf56(CNT); + } + else while (adlen > 0) { + adlen = ad_encryption(&ad,s,k,adlen,CNT,40,&l_skinny_ctrl); + } + + if ((w & 8) == 0) { + xlen = ad2msg_encryption (&mauth,CNT,s,k,44,xlen,&l_skinny_ctrl); + } + else if (clen == 0) { + lfsr_gf56(CNT); + } + while (xlen > 0) { + xlen = ad_encryption(&mauth,s,k,xlen,CNT,44,&l_skinny_ctrl); + } + nonce_encryption(N,CNT,s,k,w,&l_skinny_ctrl); + + // Tag generation + g8A(s, T); + + l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1; + + reset_lfsr_gf56(CNT); + + p1 = T; + p2 = (unsigned char*)&c[clen - 16]; + + p1[0] = p2[0]; + p1[1] = p2[1]; + p1[2] = p2[2]; + p1[3] = p2[3]; + p1[4] = p2[4]; + p1[5] = p2[5]; + p1[6] = p2[6]; + p1[7] = p2[7]; + p1[8] = p2[8]; + p1[9] = p2[9]; + p1[10] = p2[10]; + p1[11] = p2[11]; + p1[12] = p2[12]; + p1[13] = p2[13]; + p1[14] = p2[14]; + p1[15] = p2[15]; + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) = *(uint64_t*)(&T[0]); + *(uint64_t*)(&s[8]) = *(uint64_t*)(&T[8]); + +#else + + *(uint32_t*)(&s[0]) = *(uint32_t*)(&T[0]); + *(uint32_t*)(&s[4]) = *(uint32_t*)(&T[4]); + *(uint32_t*)(&s[8]) = *(uint32_t*)(&T[8]); + *(uint32_t*)(&s[12]) = *(uint32_t*)(&T[12]); + +#endif + + clen = clen - 16; + *mlen = clen; + + if (clen > 0) { + nonce_encryption(N,CNT,s,k,36,&l_skinny_ctrl); + + l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1; + + while (clen > 16) { + clen = msg_decryption(&m,&c,N,CNT,s,k,36,clen,&l_skinny_ctrl); + } + irho_ud16(m, c, s, clen); + c = c + clen; + m = m + clen; + } + + for (int i = 0; i < 16; i++) { + if (T[i] != (*(c+i))) { + return -1; + } + } + + return 0; + +} diff --git a/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/skinny.h b/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/skinny.h new file mode 100644 index 0000000..826f2f8 --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/skinny.h @@ -0,0 +1,106 @@ +#define ___SKINNY_LOOP +#define ___NUM_OF_ROUNDS_56 +#if (defined(__riscv_xlen) && (__riscv_xlen == 64)) +#define ___ENABLE_DWORD_CAST +#endif + +#include + +typedef struct ___skinny_ctrl { +#ifdef ___NUM_OF_ROUNDS_56 + unsigned char roundKeys[960]; // number of rounds : 56 +#else + unsigned char roundKeys[704]; // number of rounds : 40 +#endif + void (*func_skinny_128_384_enc)(unsigned char*, struct ___skinny_ctrl*, unsigned char* CNT, unsigned char* T, const unsigned char* K); +} skinny_ctrl; + +extern void skinny_128_384_enc123_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K); +extern void skinny_128_384_enc12_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K); +extern void skinny_128_384_enc1_1 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K); + +#define pack_word(x0, x1, x2, x3, w) \ + w = ((x3) << 24) ^ \ + ((x2) << 16) ^ \ + ((x1) << 8) ^ \ + (x0); + +#define unpack_word(x0, x1, x2, x3, w) \ + x0 = ((w) & 0xff); \ + x1 = (((w) >> 8) & 0xff); \ + x2 = (((w) >> 16) & 0xff); \ + x3 = ((w) >> 24); + +#ifdef ___ENABLE_DWORD_CAST + +#define PERMUTATION() \ +/* permutation */ \ + \ + /* 7 6 5 4 3 2 1 0 */ \ + /* 5 7 2 3 6 0 4 1 */ \ + \ + /* dw (7 6 5 4 3 2 1 0) */ \ + \ + /* dw (5 7 2 3 6 0 4 1) */ \ + \ + dt0 = dw >> 24; /* - - - 7 6 5 4 3 */ \ + dt0 = dt0 & 0x00000000ff00ff00; /* - - - - 6 - 4 - */ \ + \ + dt1 = dw << 16; /* 5 4 3 2 1 0 - - */ \ + dt1 = dt1 & 0xff00000000ff0000; /* 5 - - - - 0 - - */ \ + dt0 = dt0 ^ dt1; /* 5 - - - 6 0 4 - */ \ + \ + dt1 = dw >> 8; /* - 7 6 5 4 3 2 1 */ \ + dt1 = dt1 & 0x00ff0000000000ff; /* - 7 - - - - - 1 */ \ + dt0 = dt0 ^ dt1; /* 5 7 - - 6 0 4 1 */ \ + \ + dt1 = dw << 8; /* 6 5 4 3 2 1 0 - */ \ + dt1 = dt1 & 0x000000ff00000000; /* - - - 3 - - - - */ \ + dt0 = dt0 ^ dt1; /* 5 7 - 3 6 0 4 1 */ \ + \ + dt1 = dw << 24; /* 4 3 2 1 0 - - - */ \ + dw = dt1 & 0x0000ff0000000000; /* - - 2 - - - - - */ \ + dw = dw ^ dt0; /* 5 7 2 3 6 0 4 1 */ + +#else + +#define PERMUTATION() \ +/* permutation */ \ + \ + /* 7 6 5 4 3 2 1 0 */ \ + /* 5 7 2 3 6 0 4 1 */ \ + \ + /* w0 (3 2 1 0) */ \ + /* w1 (7 6 5 4) */ \ + \ + /* w0 (6 0 4 1) */ \ + /* w1 (5 7 2 3) */ \ + \ + t0 = w1 << 8; /* 6 5 4 - */ \ + t0 = t0 & 0xff00ff00; /* 6 - 4 - */ \ + \ + t1 = w1 << 16; /* 5 4 - - */ \ + t1 = t1 & 0xff000000; /* 5 - - - */ \ + \ + t2 = w1 & 0xff000000; /* 7 - - - */ \ + t2 = t2 >> 8; /* - 7 - - */ \ + t1 = t1 ^ t2; /* 5 7 - - */ \ + \ + t2 = w0 & 0xff000000; /* 3 - - - */ \ + t2 = t2 >> 24; /* - - - 3 */ \ + t1 = t1 ^ t2; /* 5 7 - 3 */ \ + \ + w1 = w0 >> 8; /* - 3 2 1 */ \ + w1 = w1 & 0x0000ff00; /* - - 2 - */ \ + w1 = w1 ^ t1; /* 5 7 2 3 */ \ + \ + t2 = w0 & 0x0000ff00; /* - - 1 - */ \ + t2 = t2 >> 8; /* - - - 1 */ \ + t0 = t0 ^ t2; /* 6 - 4 1 */ \ + \ + w0 = w0 << 16; /* 1 0 - - */ \ + w0 = w0 & 0x00ff0000; /* - 0 - - */ \ + w0 = w0 ^ t0; /* 6 0 4 1 */ + +#endif + diff --git a/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/skinny_key_schedule2.c b/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/skinny_key_schedule2.c new file mode 100644 index 0000000..c2f30de --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/skinny_key_schedule2.c @@ -0,0 +1,431 @@ +/****************************************************************************** + * Copyright (c) 2020, NEC Corporation. + * + * THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND. + * + *****************************************************************************/ + +/* + * SKINNY-128-384 + * + * load * AC(c0 c1) ^ TK3 + * calc AC(c0 c1) ^ TK2 -> store + * ART(TK2) + * + * number of rounds : 40 or 56 + */ + +#include "skinny.h" + +#ifdef ___ENABLE_DWORD_CAST + +#define PERMUTATION_TK2() \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK2) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x6 x5 x4 x3 x2 x1 x0 x7^x5) */ \ + dw = ((dw << 1) & 0xfefefefefefefefe) ^ \ + (((dw >> 7) ^ (dw >> 5)) & 0x0101010101010101); \ + \ + /* Load TK3 */ \ + /* TK2^TK3^AC(c0 c1) */ \ + /* store */ \ + *tk2 = dw ^ *tk3; \ + tk2 += 2; \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys) +{ + uint64_t* tk2; // used in MACRO + uint64_t* tk3; // used in MACRO + uint64_t dt0; // used in MACRO + uint64_t dt1; // used in MACRO + uint64_t dw; + + // odd + + // load master key + // load master key + dw = *(uint64_t*)&roundKeys[16]; + + tk2 = (uint64_t*)&roundKeys[64]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[384]; +#else + tk3 = (uint64_t*)&roundKeys[512]; +#endif + + // 1st round + *tk2 = dw ^ *tk3; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th,43th, ... ,51th,53th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + + // even + + // load master key + dw = *(uint64_t*)&roundKeys[24]; + + tk2 = (uint64_t*)&roundKeys[72]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[392]; +#else + tk3 = (uint64_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys) +{ + uint64_t* tk2; // used in MACRO + uint64_t* tk3; // used in MACRO + uint64_t dt0; // used in MACRO + uint64_t dt1; // used in MACRO + uint64_t dw; + + // odd + + // load master key + dw = *(uint64_t*)&roundKeys[16]; + + tk2 = (uint64_t*)&roundKeys[64]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[384]; +#else + tk3 = (uint64_t*)&roundKeys[512]; +#endif + + // 1st round + *tk2 = dw ^ *tk3; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + PERMUTATION_TK2(); + } + + // even + + // load master key + dw = *(uint64_t*)&roundKeys[24]; + + tk2 = (uint64_t*)&roundKeys[72]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[392]; +#else + tk3 = (uint64_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + PERMUTATION_TK2(); + } + +} + +#endif /* ___SKINNY_LOOP */ + +#else /* ___ENABLE_DWORD_CAST */ + +#define PERMUTATION_TK2() \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK2) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x6 x5 x4 x3 x2 x1 x0 x7^x5) */ \ + w0 = ((w0 << 1) & 0xfefefefe) ^ \ + (((w0 >> 7) ^ (w0 >> 5)) & 0x01010101); \ + w1 = ((w1 << 1) & 0xfefefefe) ^ \ + (((w1 >> 7) ^ (w1 >> 5)) & 0x01010101); \ + \ + /* Load TK3 */ \ + /* TK2^TK3^AC(c0 c1) */ \ + /* store */ \ + *tk2++ = w0 ^ *tk3++; \ + *tk2++ = w1 ^ *tk3++; \ + tk2 += 2; \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys) +{ + uint32_t* tk2; // used in MACRO + uint32_t* tk3; // used in MACRO + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + + // odd + + // load master key + w0 = *(uint32_t*)&roundKeys[16]; + w1 = *(uint32_t*)&roundKeys[20]; + + tk2 = (uint32_t*)&roundKeys[64]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[384]; +#else + tk3 = (uint32_t*)&roundKeys[512]; +#endif + + // 1st round + *tk2++ = w0 ^ *tk3++; + *tk2++ = w1 ^ *tk3++; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th,43th, ... ,51th,53th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + + // even + + // load master key + w0 = *(uint32_t*)&roundKeys[24]; + w1 = *(uint32_t*)&roundKeys[28]; + + tk2 = (uint32_t*)&roundKeys[72]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[392]; +#else + tk3 = (uint32_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys) +{ + uint32_t* tk2; // used in MACRO + uint32_t* tk3; // used in MACRO + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + + // odd + + // load master key + w0 = *(uint32_t*)&roundKeys[16]; + w1 = *(uint32_t*)&roundKeys[20]; + + tk2 = (uint32_t*)&roundKeys[64]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[384]; +#else + tk3 = (uint32_t*)&roundKeys[512]; +#endif + + // 1st round + *tk2++ = w0 ^ *tk3++; + *tk2++ = w1 ^ *tk3++; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + PERMUTATION_TK2(); + } + + // even + + // load master key + w0 = *(uint32_t*)&roundKeys[24]; + w1 = *(uint32_t*)&roundKeys[28]; + + tk2 = (uint32_t*)&roundKeys[72]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[392]; +#else + tk3 = (uint32_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + PERMUTATION_TK2(); + } + +} + +#endif /* ___SKINNY_LOOP */ + +#endif /* ___ENABLE_DWORD_CAST */ + diff --git a/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/skinny_key_schedule3.c b/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/skinny_key_schedule3.c new file mode 100644 index 0000000..5dcaf7f --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/skinny_key_schedule3.c @@ -0,0 +1,428 @@ +/****************************************************************************** + * Copyright (c) 2020, NEC Corporation. + * + * THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND. + * + *****************************************************************************/ + +/* + * SKINNY-128-384 + * + * AC(c0 c1) ^ TK3 -> store + * ART(TK3) + * + * number of rounds : 40 or 56 + */ + +#include "skinny.h" + +#ifdef ___ENABLE_DWORD_CAST + +#define PERMUTATION_TK3(c0Val, c1Val) \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK3) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x0^x6 x7 x6 x5 x4 x3 x2 x1) */ \ + dw = ((dw >> 1) & 0x7f7f7f7f7f7f7f7f) ^ \ + (((dw << 7) ^ (dw << 1)) & 0x8080808080808080); \ + \ + /* K3^AC(c0 c1) */ \ + /* store */ \ + dt0 = dw ^ c0Val; \ + *tk3 = dt0 ^ ((uint64_t)c1Val << 40); \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys) +{ + uint64_t *tk3; + uint64_t dt0; // used in MACRO + uint64_t dt1; // used in MACRO + uint64_t dw; + + // odd + + // load master key + dw = *(uint64_t*)&roundKeys[32]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[384]; +#else + tk3 = (uint64_t*)&roundKeys[512]; +#endif + + // 1st round + *tk3++ = dw ^ 0x01; + tk3 += 1; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK3(0x7, 0x0); + PERMUTATION_TK3(0xf, 0x1); + PERMUTATION_TK3(0xd, 0x3); + PERMUTATION_TK3(0x7, 0x3); + PERMUTATION_TK3(0xe, 0x1); + PERMUTATION_TK3(0x9, 0x3); + PERMUTATION_TK3(0x7, 0x2); + PERMUTATION_TK3(0xd, 0x1); + PERMUTATION_TK3(0x5, 0x3); + + PERMUTATION_TK3(0x6, 0x1); + PERMUTATION_TK3(0x8, 0x1); + PERMUTATION_TK3(0x1, 0x2); + PERMUTATION_TK3(0x5, 0x0); + PERMUTATION_TK3(0x7, 0x1); + PERMUTATION_TK3(0xc, 0x1); + PERMUTATION_TK3(0x1, 0x3); + PERMUTATION_TK3(0x6, 0x0); + PERMUTATION_TK3(0xb, 0x1); + PERMUTATION_TK3(0xd, 0x2); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41td,43th, ... ,53th,55th round + PERMUTATION_TK3(0x4, 0x3); + PERMUTATION_TK3(0x2, 0x1); + PERMUTATION_TK3(0x8, 0x0); + PERMUTATION_TK3(0x2, 0x2); + PERMUTATION_TK3(0x9, 0x0); + PERMUTATION_TK3(0x6, 0x2); + PERMUTATION_TK3(0x9, 0x1); + PERMUTATION_TK3(0x5, 0x2); + +#endif + + // even + + // load master key + dw = *(uint64_t*)&roundKeys[40]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[392]; +#else + tk3 = (uint64_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... ,38th,40th round + PERMUTATION_TK3(0x3, 0x0); + PERMUTATION_TK3(0xf, 0x0); + PERMUTATION_TK3(0xe, 0x3); + PERMUTATION_TK3(0xb, 0x3); + PERMUTATION_TK3(0xf, 0x2); + PERMUTATION_TK3(0xc, 0x3); + PERMUTATION_TK3(0x3, 0x3); + PERMUTATION_TK3(0xe, 0x0); + PERMUTATION_TK3(0xa, 0x3); + PERMUTATION_TK3(0xb, 0x2); + + PERMUTATION_TK3(0xc, 0x2); + PERMUTATION_TK3(0x0, 0x3); + PERMUTATION_TK3(0x2, 0x0); + PERMUTATION_TK3(0xb, 0x0); + PERMUTATION_TK3(0xe, 0x2); + PERMUTATION_TK3(0x8, 0x3); + PERMUTATION_TK3(0x3, 0x2); + PERMUTATION_TK3(0xd, 0x0); + PERMUTATION_TK3(0x6, 0x3); + PERMUTATION_TK3(0xa, 0x1); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK3(0x9, 0x2); + PERMUTATION_TK3(0x4, 0x2); + PERMUTATION_TK3(0x1, 0x1); + PERMUTATION_TK3(0x4, 0x0); + PERMUTATION_TK3(0x3, 0x1); + PERMUTATION_TK3(0xc, 0x0); + PERMUTATION_TK3(0x2, 0x3); + PERMUTATION_TK3(0xa, 0x0); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys, unsigned char *pRC) +{ + uint64_t *tk3; + uint64_t dt0; // used in MACRO + uint64_t dt1; // used in MACRO + uint64_t dw; + uint64_t c0; + uint64_t c1; + + // odd + + // load master key + dw = *(uint64_t*)&roundKeys[32]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[384]; +#else + tk3 = (uint64_t*)&roundKeys[512]; +#endif + + // 1st round + *tk3++ = dw ^ 0x01; + tk3 += 1; + + pRC += 4; + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + + // even + + // load master key + dw = *(uint64_t*)&roundKeys[40]; + +#ifndef ___NUM_OF_ROUNDS_56 + pRC -= 78; + tk3 = (uint64_t*)&roundKeys[392]; +#else + pRC -= 110; + tk3 = (uint64_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + +} + +#endif /* ___SKINNY_LOOP */ + +#else /* ___ENABLE_DWORD_CAST */ + +#define PERMUTATION_TK3(c0Val, c1Val) \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK3) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x0^x6 x7 x6 x5 x4 x3 x2 x1) */ \ + w0 = ((w0 >> 1) & 0x7f7f7f7f) ^ \ + (((w0 << 7) ^ (w0 << 1)) & 0x80808080); \ + w1 = ((w1 >> 1) & 0x7f7f7f7f) ^ \ + (((w1 << 7) ^ (w1 << 1)) & 0x80808080); \ + \ + /* K3^AC(c0 c1) */ \ + /* store */ \ + *tk3++ = w0 ^ c0Val; \ + *tk3++ = w1 ^ ((uint32_t)c1Val << 8); \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys) +{ + uint32_t *tk3; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + + // odd + + // load master key + w0 = *(uint32_t*)&roundKeys[32]; + w1 = *(uint32_t*)&roundKeys[36]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[384]; +#else + tk3 = (uint32_t*)&roundKeys[512]; +#endif + + // 1st round + *tk3++ = w0 ^ 0x01; + *tk3++ = w1; + tk3 += 2; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK3(0x7, 0x0); + PERMUTATION_TK3(0xf, 0x1); + PERMUTATION_TK3(0xd, 0x3); + PERMUTATION_TK3(0x7, 0x3); + PERMUTATION_TK3(0xe, 0x1); + PERMUTATION_TK3(0x9, 0x3); + PERMUTATION_TK3(0x7, 0x2); + PERMUTATION_TK3(0xd, 0x1); + PERMUTATION_TK3(0x5, 0x3); + + PERMUTATION_TK3(0x6, 0x1); + PERMUTATION_TK3(0x8, 0x1); + PERMUTATION_TK3(0x1, 0x2); + PERMUTATION_TK3(0x5, 0x0); + PERMUTATION_TK3(0x7, 0x1); + PERMUTATION_TK3(0xc, 0x1); + PERMUTATION_TK3(0x1, 0x3); + PERMUTATION_TK3(0x6, 0x0); + PERMUTATION_TK3(0xb, 0x1); + PERMUTATION_TK3(0xd, 0x2); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41td,43th, ... ,53th,55th round + PERMUTATION_TK3(0x4, 0x3); + PERMUTATION_TK3(0x2, 0x1); + PERMUTATION_TK3(0x8, 0x0); + PERMUTATION_TK3(0x2, 0x2); + PERMUTATION_TK3(0x9, 0x0); + PERMUTATION_TK3(0x6, 0x2); + PERMUTATION_TK3(0x9, 0x1); + PERMUTATION_TK3(0x5, 0x2); + +#endif + + // even + + // load master key + w0 = *(uint32_t*)&roundKeys[40]; + w1 = *(uint32_t*)&roundKeys[44]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[392]; +#else + tk3 = (uint32_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... ,38th,40th round + PERMUTATION_TK3(0x3, 0x0); + PERMUTATION_TK3(0xf, 0x0); + PERMUTATION_TK3(0xe, 0x3); + PERMUTATION_TK3(0xb, 0x3); + PERMUTATION_TK3(0xf, 0x2); + PERMUTATION_TK3(0xc, 0x3); + PERMUTATION_TK3(0x3, 0x3); + PERMUTATION_TK3(0xe, 0x0); + PERMUTATION_TK3(0xa, 0x3); + PERMUTATION_TK3(0xb, 0x2); + + PERMUTATION_TK3(0xc, 0x2); + PERMUTATION_TK3(0x0, 0x3); + PERMUTATION_TK3(0x2, 0x0); + PERMUTATION_TK3(0xb, 0x0); + PERMUTATION_TK3(0xe, 0x2); + PERMUTATION_TK3(0x8, 0x3); + PERMUTATION_TK3(0x3, 0x2); + PERMUTATION_TK3(0xd, 0x0); + PERMUTATION_TK3(0x6, 0x3); + PERMUTATION_TK3(0xa, 0x1); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK3(0x9, 0x2); + PERMUTATION_TK3(0x4, 0x2); + PERMUTATION_TK3(0x1, 0x1); + PERMUTATION_TK3(0x4, 0x0); + PERMUTATION_TK3(0x3, 0x1); + PERMUTATION_TK3(0xc, 0x0); + PERMUTATION_TK3(0x2, 0x3); + PERMUTATION_TK3(0xa, 0x0); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys, unsigned char *pRC) +{ + uint32_t *tk3; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + uint32_t c0; + uint32_t c1; + + // odd + + // load master key + w0 = *(uint32_t*)&roundKeys[32]; + w1 = *(uint32_t*)&roundKeys[36]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[384]; +#else + tk3 = (uint32_t*)&roundKeys[512]; +#endif + + // 1st round + *tk3++ = w0 ^ 0x01; + *tk3++ = w1; + tk3 += 2; + + pRC += 4; + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + + // even + + // load master key + w0 = *(uint32_t*)&roundKeys[40]; + w1 = *(uint32_t*)&roundKeys[44]; + +#ifndef ___NUM_OF_ROUNDS_56 + pRC -= 78; + tk3 = (uint32_t*)&roundKeys[392]; +#else + pRC -= 110; + tk3 = (uint32_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + +} + +#endif /* ___SKINNY_LOOP */ + +#endif /* ___ENABLE_DWORD_CAST */ + diff --git a/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/skinny_main.c b/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/skinny_main.c new file mode 100644 index 0000000..8a6e75f --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/skinny_main.c @@ -0,0 +1,675 @@ +/****************************************************************************** + * Copyright (c) 2020, NEC Corporation. + * + * THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND. + * + *****************************************************************************/ + +/* + * SKINNY-128-384 + * + * ART(TK1) -> store + * load AC(c0 c1) ^ TK3 ^ TK2 + * load TK1 + * calc AC(c0 c1) ^ TK3 ^ TK2 ^ TK1 -> use at (AC->ART) + * SC->SR->(AC->ART)->MC + * + * number of rounds : 40 or 56 + */ + +#include "skinny.h" + +/* + * S-BOX + */ +unsigned char SBOX[] += { + // Original + 0x65, 0x4c, 0x6a, 0x42, 0x4b, 0x63, 0x43, 0x6b, 0x55, 0x75, 0x5a, 0x7a, 0x53, 0x73, 0x5b, 0x7b, + 0x35, 0x8c, 0x3a, 0x81, 0x89, 0x33, 0x80, 0x3b, 0x95, 0x25, 0x98, 0x2a, 0x90, 0x23, 0x99, 0x2b, + 0xe5, 0xcc, 0xe8, 0xc1, 0xc9, 0xe0, 0xc0, 0xe9, 0xd5, 0xf5, 0xd8, 0xf8, 0xd0, 0xf0, 0xd9, 0xf9, + 0xa5, 0x1c, 0xa8, 0x12, 0x1b, 0xa0, 0x13, 0xa9, 0x05, 0xb5, 0x0a, 0xb8, 0x03, 0xb0, 0x0b, 0xb9, + 0x32, 0x88, 0x3c, 0x85, 0x8d, 0x34, 0x84, 0x3d, 0x91, 0x22, 0x9c, 0x2c, 0x94, 0x24, 0x9d, 0x2d, + 0x62, 0x4a, 0x6c, 0x45, 0x4d, 0x64, 0x44, 0x6d, 0x52, 0x72, 0x5c, 0x7c, 0x54, 0x74, 0x5d, 0x7d, + 0xa1, 0x1a, 0xac, 0x15, 0x1d, 0xa4, 0x14, 0xad, 0x02, 0xb1, 0x0c, 0xbc, 0x04, 0xb4, 0x0d, 0xbd, + 0xe1, 0xc8, 0xec, 0xc5, 0xcd, 0xe4, 0xc4, 0xed, 0xd1, 0xf1, 0xdc, 0xfc, 0xd4, 0xf4, 0xdd, 0xfd, + 0x36, 0x8e, 0x38, 0x82, 0x8b, 0x30, 0x83, 0x39, 0x96, 0x26, 0x9a, 0x28, 0x93, 0x20, 0x9b, 0x29, + 0x66, 0x4e, 0x68, 0x41, 0x49, 0x60, 0x40, 0x69, 0x56, 0x76, 0x58, 0x78, 0x50, 0x70, 0x59, 0x79, + 0xa6, 0x1e, 0xaa, 0x11, 0x19, 0xa3, 0x10, 0xab, 0x06, 0xb6, 0x08, 0xba, 0x00, 0xb3, 0x09, 0xbb, + 0xe6, 0xce, 0xea, 0xc2, 0xcb, 0xe3, 0xc3, 0xeb, 0xd6, 0xf6, 0xda, 0xfa, 0xd3, 0xf3, 0xdb, 0xfb, + 0x31, 0x8a, 0x3e, 0x86, 0x8f, 0x37, 0x87, 0x3f, 0x92, 0x21, 0x9e, 0x2e, 0x97, 0x27, 0x9f, 0x2f, + 0x61, 0x48, 0x6e, 0x46, 0x4f, 0x67, 0x47, 0x6f, 0x51, 0x71, 0x5e, 0x7e, 0x57, 0x77, 0x5f, 0x7f, + 0xa2, 0x18, 0xae, 0x16, 0x1f, 0xa7, 0x17, 0xaf, 0x01, 0xb2, 0x0e, 0xbe, 0x07, 0xb7, 0x0f, 0xbf, + 0xe2, 0xca, 0xee, 0xc6, 0xcf, 0xe7, 0xc7, 0xef, 0xd2, 0xf2, 0xde, 0xfe, 0xd7, 0xf7, 0xdf, 0xff, +}; + + /* + * S-BOX ^ AC(c2) + */ +unsigned char SBOX2[] += { // Original ^ c2(0x02) + 0x67, 0x4e, 0x68, 0x40, 0x49, 0x61, 0x41, 0x69, 0x57, 0x77, 0x58, 0x78, 0x51, 0x71, 0x59, 0x79, + 0x37, 0x8e, 0x38, 0x83, 0x8b, 0x31, 0x82, 0x39, 0x97, 0x27, 0x9a, 0x28, 0x92, 0x21, 0x9b, 0x29, + 0xe7, 0xce, 0xea, 0xc3, 0xcb, 0xe2, 0xc2, 0xeb, 0xd7, 0xf7, 0xda, 0xfa, 0xd2, 0xf2, 0xdb, 0xfb, + 0xa7, 0x1e, 0xaa, 0x10, 0x19, 0xa2, 0x11, 0xab, 0x07, 0xb7, 0x08, 0xba, 0x01, 0xb2, 0x09, 0xbb, + 0x30, 0x8a, 0x3e, 0x87, 0x8f, 0x36, 0x86, 0x3f, 0x93, 0x20, 0x9e, 0x2e, 0x96, 0x26, 0x9f, 0x2f, + 0x60, 0x48, 0x6e, 0x47, 0x4f, 0x66, 0x46, 0x6f, 0x50, 0x70, 0x5e, 0x7e, 0x56, 0x76, 0x5f, 0x7f, + 0xa3, 0x18, 0xae, 0x17, 0x1f, 0xa6, 0x16, 0xaf, 0x00, 0xb3, 0x0e, 0xbe, 0x06, 0xb6, 0x0f, 0xbf, + 0xe3, 0xca, 0xee, 0xc7, 0xcf, 0xe6, 0xc6, 0xef, 0xd3, 0xf3, 0xde, 0xfe, 0xd6, 0xf6, 0xdf, 0xff, + 0x34, 0x8c, 0x3a, 0x80, 0x89, 0x32, 0x81, 0x3b, 0x94, 0x24, 0x98, 0x2a, 0x91, 0x22, 0x99, 0x2b, + 0x64, 0x4c, 0x6a, 0x43, 0x4b, 0x62, 0x42, 0x6b, 0x54, 0x74, 0x5a, 0x7a, 0x52, 0x72, 0x5b, 0x7b, + 0xa4, 0x1c, 0xa8, 0x13, 0x1b, 0xa1, 0x12, 0xa9, 0x04, 0xb4, 0x0a, 0xb8, 0x02, 0xb1, 0x0b, 0xb9, + 0xe4, 0xcc, 0xe8, 0xc0, 0xc9, 0xe1, 0xc1, 0xe9, 0xd4, 0xf4, 0xd8, 0xf8, 0xd1, 0xf1, 0xd9, 0xf9, + 0x33, 0x88, 0x3c, 0x84, 0x8d, 0x35, 0x85, 0x3d, 0x90, 0x23, 0x9c, 0x2c, 0x95, 0x25, 0x9d, 0x2d, + 0x63, 0x4a, 0x6c, 0x44, 0x4d, 0x65, 0x45, 0x6d, 0x53, 0x73, 0x5c, 0x7c, 0x55, 0x75, 0x5d, 0x7d, + 0xa0, 0x1a, 0xac, 0x14, 0x1d, 0xa5, 0x15, 0xad, 0x03, 0xb0, 0x0c, 0xbc, 0x05, 0xb5, 0x0d, 0xbd, + 0xe0, 0xc8, 0xec, 0xc4, 0xcd, 0xe5, 0xc5, 0xed, 0xd0, 0xf0, 0xdc, 0xfc, 0xd5, 0xf5, 0xdd, 0xfd, +}; + +#ifdef ___SKINNY_LOOP +/* + * Round Constants + */ +unsigned char RC[] += { + 0x01, 0x00, 0x03, 0x00, 0x07, 0x00, 0x0f, 0x00, 0x0f, 0x01, 0x0e, 0x03, 0x0d, 0x03, 0x0b, 0x03, + 0x07, 0x03, 0x0f, 0x02, 0x0e, 0x01, 0x0c, 0x03, 0x09, 0x03, 0x03, 0x03, 0x07, 0x02, 0x0e, 0x00, + 0x0d, 0x01, 0x0a, 0x03, 0x05, 0x03, 0x0b, 0x02, 0x06, 0x01, 0x0c, 0x02, 0x08, 0x01, 0x00, 0x03, + 0x01, 0x02, 0x02, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x07, 0x01, 0x0e, 0x02, 0x0c, 0x01, 0x08, 0x03, + 0x01, 0x03, 0x03, 0x02, 0x06, 0x00, 0x0d, 0x00, 0x0b, 0x01, 0x06, 0x03, 0x0d, 0x02, 0x0a, 0x01, +#ifdef ___NUM_OF_ROUNDS_56 + 0x04, 0x03, 0x09, 0x02, 0x02, 0x01, 0x04, 0x02, 0x08, 0x00, 0x01, 0x01, 0x02, 0x02, 0x04, 0x00, + 0x09, 0x00, 0x03, 0x01, 0x06, 0x02, 0x0c, 0x00, 0x09, 0x01, 0x02, 0x03, 0x05, 0x02, 0x0a, 0x00, +#endif + }; +#endif + +extern void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2); +extern void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys); +#ifdef ___SKINNY_LOOP +extern void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys, unsigned char *pRC); +#else +extern void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys); +#endif + +void skinny_128_384_enc123_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K) +{ + uint32_t *pt = (uint32_t*)&pskinny_ctrl->roundKeys[0]; + + pt[0] = *(uint32_t*)(&CNT[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + pt[4] = *(uint32_t*)(&T[0]); + pack_word(T[7], T[4], T[5], T[6], pt[5]); + pt[6] = *(uint32_t*)(&T[8]); + pack_word(T[15], T[12], T[13], T[14], pt[7]); + + pt[8] = *(uint32_t*)(&K[0]); + pack_word(K[7], K[4], K[5], K[6], pt[9]); + pt[10] = *(uint32_t*)(&K[8]); + pack_word(K[15], K[12], K[13], K[14], pt[11]); + +#ifdef ___SKINNY_LOOP + RunEncryptionKeyScheduleTK3(pskinny_ctrl->roundKeys, RC); +#else + RunEncryptionKeyScheduleTK3(pskinny_ctrl->roundKeys); +#endif + RunEncryptionKeyScheduleTK2(pskinny_ctrl->roundKeys); + Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2); + + pskinny_ctrl->func_skinny_128_384_enc = skinny_128_384_enc12_12; + +} + +void skinny_128_384_enc12_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K) +{ + (void)K; + + uint32_t *pt = &pskinny_ctrl->roundKeys[0]; + + pt[0] = *(uint32_t*)(&CNT[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + pt[4] = *(uint32_t*)(&T[0]); + pack_word(T[7], T[4], T[5], T[6], pt[5]); + pt[6] = *(uint32_t*)(&T[8]); + pack_word(T[15], T[12], T[13], T[14], pt[7]); + + RunEncryptionKeyScheduleTK2(pskinny_ctrl->roundKeys); + Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2); + +} + +extern void skinny_128_384_enc1_1 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K) +{ + (void)T; + (void)K; + + uint32_t *pt = &pskinny_ctrl->roundKeys[0]; + + pt[0] = *(uint32_t*)(&CNT[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2); + +} + +#define PERMUTATION_TK1() \ + \ +/* permutation */ \ +{ \ + unsigned char tmp0 = roundKeys[0]; \ + unsigned char tmp1 = roundKeys[1]; \ + unsigned char tmp2 = roundKeys[2]; \ + unsigned char tmp3 = roundKeys[3]; \ + unsigned char tmp4 = roundKeys[4]; \ + unsigned char tmp5 = roundKeys[5]; \ + unsigned char tmp6 = roundKeys[6]; \ + unsigned char tmp7 = roundKeys[7]; \ + \ + unsigned char* dst = &roundKeys[8]; \ + \ + /* 5 7 2 3 6 0 4 1 */ \ + *dst++ = tmp1; \ + *dst++ = tmp4; \ + *dst++ = tmp0; \ + *dst++ = tmp6; \ + *dst++ = tmp3; \ + *dst++ = tmp2; \ + *dst++ = tmp7; \ + *dst++ = tmp5; \ + \ + /* 2 5 0 6 7 1 3 4 */ \ + *dst++ = tmp4; \ + *dst++ = tmp3; \ + *dst++ = tmp1; \ + *dst++ = tmp7; \ + *dst++ = tmp6; \ + *dst++ = tmp0; \ + *dst++ = tmp5; \ + *dst++ = tmp2; \ + \ + /* 0 2 1 7 5 4 6 3 */ \ + *dst++ = tmp3; \ + *dst++ = tmp6; \ + *dst++ = tmp4; \ + *dst++ = tmp5; \ + *dst++ = tmp7; \ + *dst++ = tmp1; \ + *dst++ = tmp2; \ + *dst++ = tmp0; \ + \ + /* 1 0 4 5 2 3 7 6 */ \ + *dst++ = tmp6; \ + *dst++ = tmp7; \ + *dst++ = tmp3; \ + *dst++ = tmp2; \ + *dst++ = tmp5; \ + *dst++ = tmp4; \ + *dst++ = tmp0; \ + *dst++ = tmp1; \ + \ + /* 4 1 3 2 0 6 5 7 */ \ + *dst++ = tmp7; \ + *dst++ = tmp5; \ + *dst++ = tmp6; \ + *dst++ = tmp0; \ + *dst++ = tmp2; \ + *dst++ = tmp3; \ + *dst++ = tmp1; \ + *dst++ = tmp4; \ + \ + /* 3 4 6 0 1 7 2 5 */ \ + *dst++ = tmp5; \ + *dst++ = tmp2; \ + *dst++ = tmp7; \ + *dst++ = tmp1; \ + *dst++ = tmp0; \ + *dst++ = tmp6; \ + *dst++ = tmp4; \ + *dst++ = tmp3; \ + \ + /* 6 3 7 1 4 5 0 2 */ \ + *dst++ = tmp2; \ + *dst++ = tmp0; \ + *dst++ = tmp5; \ + *dst++ = tmp4; \ + *dst++ = tmp1; \ + *dst++ = tmp7; \ + *dst++ = tmp3; \ + *dst++ = tmp6; \ +} + +#define SBOX_0(b0, b1, b2, b3) \ + \ + t0 = sbox[b0]; \ + t1 = sbox[b1]; \ + t2 = sbox[b2]; \ + t3 = sbox[b3]; \ + \ + b0 = (uint8_t)t0; \ + b1 = (uint8_t)t1; \ + b2 = (uint8_t)t2; \ + b3 = (uint8_t)t3; + +#define SBOX_8(b0, b1, b2, b3) \ + \ + t0 = sbox[b0]; \ + t1 = sbox[b1]; \ + t2 = sbox[b2]; \ + t3 = sbox[b3]; \ + \ + b0 = (uint8_t)t3; \ + b1 = (uint8_t)t0; \ + b2 = (uint8_t)t1; \ + b3 = (uint8_t)t2; + +#define SBOX_16(b0, b1, b2, b3) \ + \ + t0 = sbox2[b0]; /* AC(c2) */ \ + t1 = sbox[b1]; \ + t2 = sbox[b2]; \ + t3 = sbox[b3]; \ + \ + b0 = (uint8_t)t2; \ + b1 = (uint8_t)t3; \ + b2 = (uint8_t)t0; \ + b3 = (uint8_t)t1; + +#define SBOX_24(b0, b1, b2, b3) \ + \ + t0 = sbox[b0]; \ + t1 = sbox[b1]; \ + t2 = sbox[b2]; \ + t3 = sbox[b3]; \ + \ + b0 = (uint8_t)t1; \ + b1 = (uint8_t)t2; \ + b2 = (uint8_t)t3; \ + b3 = (uint8_t)t0; + +#ifdef ___ENABLE_DWORD_CAST + +#define SKINNY_MAIN() \ +{ \ + \ + /* odd */ \ + \ + /* LUT(with ShiftRows & AC(c2))*/ \ + \ + SBOX_0( block[0], block[1], block[2], block[3]); \ + SBOX_8( block[4], block[5], block[6], block[7]); \ + SBOX_16(block[8], block[9], block[10], block[11]); \ + SBOX_24(block[12], block[13], block[14], block[15]); \ + \ + /* TK1^TK2^TK3^AC(c0 c1) */ \ + \ + t1 = *(uint64_t*)&block[0]; \ + t1 ^= *tk1++; \ + t1 ^= *tk2++; \ + \ + /* MC */ \ + \ + t2 = *(uint64_t*)&block[8]; \ + t0 = t2 >> 32; \ + \ + /* 0^2 */ \ + t3 = t1 ^ t2; \ + \ + /* 1^2 */ \ + t2 = (t1 >> 32) ^ t2; \ + \ + /* 0^2^3 */ \ + t0 = t0 ^ t3; \ + \ + *(uint32_t*)&block[0] = (uint32_t)t0; \ + *(uint32_t*)&block[4] = (uint32_t)t1; \ + *(uint32_t*)&block[8] = (uint32_t)t2; \ + *(uint32_t*)&block[12] = (uint32_t)t3; \ + \ + /* even */ \ + \ + /* LUT(with ShiftRows & AC(c2))*/ \ + \ + SBOX_0( block[0], block[1], block[2], block[3]); \ + SBOX_8( block[4], block[5], block[6], block[7]); \ + SBOX_16(block[8], block[9], block[10], block[11]); \ + SBOX_24(block[12], block[13], block[14], block[15]); \ + \ + /* TK2^TK3^AC(c0 c1) */ \ + \ + t1 = *(uint64_t*)&block[0]; \ + t1 ^= *tk2++; \ + \ + /* MC */ \ + \ + t2 = *(uint64_t*)&block[8]; \ + t0 = t2 >> 32; \ + \ + /* 0^2 */ \ + t3 = t1 ^ t2; \ + \ + /* 1^2 */ \ + t2 = (t1 >> 32) ^ t2; \ + \ + /* 0^2^3 */ \ + t0 = t0 ^ t3; \ + \ + *(uint32_t*)&block[0] = (uint32_t)t0; \ + *(uint32_t*)&block[4] = (uint32_t)t1; \ + *(uint32_t*)&block[8] = (uint32_t)t2; \ + *(uint32_t*)&block[12] = (uint32_t)t3; \ +} + +#ifndef ___SKINNY_LOOP + +void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint64_t *tk1; + uint64_t *tk2; + uint64_t t0; // used in MACRO + uint64_t t1; // used in MACRO + uint64_t t2; // used in MACRO + uint64_t t3; // used in MACRO + +// TK1 + + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + + tk2 = (uint64_t*)&roundKeys[64]; + tk1 = (uint64_t*)&roundKeys[0]; + + // 1st, ...,16th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint64_t*)&roundKeys[0]; + + // 17th, ...,32th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint64_t*)&roundKeys[0]; + + // 33th, ...,40th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th, ...,48th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint64_t*)&roundKeys[0]; + + // 49th, ... ,56th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint64_t *tk1; + uint64_t *tk2; + uint64_t t0; // used in MACRO + uint64_t t1; // used in MACRO + uint64_t t2; // used in MACRO + uint64_t t3; // used in MACRO + +// TK1 + + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + + tk2 = (uint64_t*)&roundKeys[64]; + + // 1st, ... ,32th or 48th round +#ifndef ___NUM_OF_ROUNDS_56 + for(int j=0;j<2;j++) +#else + for(int j=0;j<3;j++) +#endif + { + tk1 = (uint64_t*)&roundKeys[0]; + for(int i=0;i<8;i++) + { + SKINNY_MAIN(); + } + } + + // 33th , ... ,40th or 49th, .... ,56th round + { + tk1 = (uint64_t*)&roundKeys[0]; + for(int i=0;i<4;i++) + { + SKINNY_MAIN(); + } + } +} + +#endif /* ___SKINNY_LOOP */ + +#else /* ___ENABLE_DWORD_CAST */ + +#define SKINNY_MAIN() \ +{ \ + \ + /* odd */ \ + \ + /* LUT(with ShiftRows & AC(c2))*/ \ + \ + SBOX_0( block[0], block[1], block[2], block[3]); \ + SBOX_8( block[4], block[5], block[6], block[7]); \ + SBOX_16(block[8], block[9], block[10], block[11]); \ + SBOX_24(block[12], block[13], block[14], block[15]); \ + \ + /* TK1^TK2^TK3^AC(c0 c1) */ \ + \ + t1 = *(uint32_t*)&block[0]; \ + t0 = *(uint32_t*)&block[4]; \ + t1 ^= *tk1++; \ + t1 ^= *tk2++; \ + t0 ^= *tk1++; \ + t0 ^= *tk2++; \ + \ + /* MC */ \ + \ + t2 = *(uint32_t*)&block[8]; \ + t4 = *(uint32_t*)&block[12]; \ + \ + /* 0^2 */ \ + t3 = t1 ^ t2; \ + \ + /* 1^2 */ \ + t2 = t0 ^ t2; \ + \ + /* 0^2^3 */ \ + t0 = t3 ^ t4; \ + \ + *(uint32_t*)&block[0] = t0; \ + *(uint32_t*)&block[4] = t1; \ + *(uint32_t*)&block[8] = t2; \ + *(uint32_t*)&block[12] = t3; \ + \ + /* even */ \ + \ + /* LUT(with ShiftRows & AC(c2))*/ \ + \ + SBOX_0( block[0], block[1], block[2], block[3]); \ + SBOX_8( block[4], block[5], block[6], block[7]); \ + SBOX_16(block[8], block[9], block[10], block[11]); \ + SBOX_24(block[12], block[13], block[14], block[15]); \ + \ + /* TK2^TK3^AC(c0 c1) */ \ + \ + t1 = *(uint32_t*)&block[0]; \ + t0 = *(uint32_t*)&block[4]; \ + t1 ^= *tk2++; \ + t0 ^= *tk2++; \ + \ + /* MC */ \ + \ + t2 = *(uint32_t*)&block[8]; \ + t4 = *(uint32_t*)&block[12]; \ + \ + /* 0^2 */ \ + t3 = t1 ^ t2; \ + \ + /* 1^2 */ \ + t2 = t0 ^ t2; \ + \ + /* 0^2^3 */ \ + t0 = t3 ^ t4; \ + \ + *(uint32_t*)&block[0] = t0; \ + *(uint32_t*)&block[4] = t1; \ + *(uint32_t*)&block[8] = t2; \ + *(uint32_t*)&block[12] = t3; \ +} + +#ifndef ___SKINNY_LOOP + +void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint32_t *tk1; + uint32_t *tk2; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t t3; // used in MACRO + uint32_t t4; // used in MACRO + +// TK1 + + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + + tk2 = (uint32_t*)&roundKeys[64]; + tk1 = (uint32_t*)&roundKeys[0]; + + // 1st, ...,16th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint32_t*)&roundKeys[0]; + + // 17th, ...,32th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint32_t*)&roundKeys[0]; + + // 33th, ...,40th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th, ...,48th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint32_t*)&roundKeys[0]; + + // 49th, ... ,56th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint32_t *tk1; + uint32_t *tk2; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t t3; // used in MACRO + uint32_t t4; // used in MACRO + +// TK1 + + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + + tk2 = (uint32_t*)&roundKeys[64]; + + // 1st, ... ,32th or 48th round +#ifndef ___NUM_OF_ROUNDS_56 + for(int j=0;j<2;j++) +#else + for(int j=0;j<3;j++) +#endif + { + tk1 = (uint32_t*)&roundKeys[0]; + for(int i=0;i<8;i++) + { + SKINNY_MAIN(); + } + } + + // 33th , ... ,40th or 49th, .... ,56th round + { + tk1 = (uint32_t*)&roundKeys[0]; + for(int i=0;i<4;i++) + { + SKINNY_MAIN(); + } + } +} + +#endif /* ___SKINNY_LOOP */ + +#endif /* ___ENABLE_DWORD_CAST */ + diff --git a/romulus/Implementations/crypto_aead/romulusm1/rhys/internal-skinny128.c b/romulus/Implementations/crypto_aead/romulusm1/rhys/internal-skinny128.c index 579ced1..d4adca0 100644 --- a/romulus/Implementations/crypto_aead/romulusm1/rhys/internal-skinny128.c +++ b/romulus/Implementations/crypto_aead/romulusm1/rhys/internal-skinny128.c @@ -90,7 +90,7 @@ void skinny_128_384_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -99,11 +99,25 @@ void skinny_128_384_init schedule[0] = TK2[0] ^ TK3[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ TK3[1] ^ (rc >> 4); - /* Permute TK2 and TK3 for the next round */ - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); + /* Permute the bottom half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_permute_tk_half(TK3[2], TK3[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + skinny128_LFSR3(TK3[2]); + skinny128_LFSR3(TK3[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ TK3[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ TK3[3] ^ (rc >> 4); - /* Apply the LFSR's to TK2 and TK3 */ + /* Permute the top half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); + skinny128_permute_tk_half(TK3[0], TK3[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR3(TK3[0]); @@ -112,6 +126,98 @@ void skinny_128_384_init #endif } +/** + * \brief Performs an unrolled round for Skinny-128-384 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + } while (0) + +/** + * \brief Performs an unrolled round for Skinny-128-384 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK3[(1 - half) * 2]); \ + skinny128_LFSR3(TK3[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_384_encrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -125,7 +231,6 @@ void skinny_128_384_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -150,53 +255,19 @@ void skinny_128_384_encrypt TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_384_round(s0, s1, s2, s3, 0, 0); + skinny_128_384_round(s3, s0, s1, s2, 1, 1); + skinny_128_384_round(s2, s3, s0, s1, 0, 2); + skinny_128_384_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -207,6 +278,93 @@ void skinny_128_384_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Inverse permutation on TK1 for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + s1 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s2 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK3[(1 - half) * 2]); \ + skinny128_LFSR2(TK3[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_384_decrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -218,9 +376,8 @@ void skinny_128_384_decrypt uint32_t TK3[4]; uint8_t rc = 0x15; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -251,7 +408,7 @@ void skinny_128_384_decrypt skinny128_fast_forward_tk(TK2); skinny128_fast_forward_tk(TK3); for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2 and TK3. + /* Also fast-forward the LFSR's on every byte of TK2 and TK3 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -263,50 +420,20 @@ void skinny_128_384_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_inv_permute_tk(TK3); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); - skinny128_LFSR2(TK3[2]); - skinny128_LFSR2(TK3[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); + skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_384_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_384_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_384_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_384_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_384_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_384_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_384_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -316,6 +443,57 @@ void skinny_128_384_decrypt le_store_word32(output + 12, s3); } +/** + * \def skinny_128_384_round_tk2(s0, s1, s2, s3, half) + * \brief Performs an unrolled round for skinny_128_384_encrypt_tk2(). + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#if SKINNY_128_SMALL_SCHEDULE +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round_tk_full(s0, s1, s2, s3, half) +#else /* !SKINNY_128_SMALL_SCHEDULE */ +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2] ^ TK2[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1] ^ \ + TK2[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 and TK2 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) +#endif /* !SKINNY_128_SMALL_SCHEDULE */ + void skinny_128_384_encrypt_tk2 (skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input, const unsigned char *tk2) @@ -329,7 +507,6 @@ void skinny_128_384_encrypt_tk2 #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -354,53 +531,14 @@ void skinny_128_384_encrypt_tk2 TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0] ^ TK2[0]; - s1 ^= schedule[1] ^ TK1[1] ^ TK2[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK3); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); -#else - schedule += 2; + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk2(s0, s1, s2, s3, 0, 0); + skinny_128_384_round_tk2(s3, s0, s1, s2, 1, 1); + skinny_128_384_round_tk2(s2, s3, s0, s1, 0, 2); + skinny_128_384_round_tk2(s1, s2, s3, s0, 1, 3); +#if !SKINNY_128_SMALL_SCHEDULE + schedule += 8; #endif } @@ -419,7 +557,6 @@ void skinny_128_384_encrypt_tk_full uint32_t TK1[4]; uint32_t TK2[4]; uint32_t TK3[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -443,45 +580,12 @@ void skinny_128_384_encrypt_tk_full TK3[2] = le_load_word32(key + 40); TK3[3] = le_load_word32(key + 44); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ @@ -518,7 +622,7 @@ void skinny_128_256_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -527,16 +631,87 @@ void skinny_128_256_init schedule[0] = TK2[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ (rc >> 4); - /* Permute TK2 for the next round */ - skinny128_permute_tk(TK2); + /* Permute the bottom half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ (rc >> 4); - /* Apply the LFSR to TK2 */ + /* Permute the top half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); } #endif } +/** + * \brief Performs an unrolled round for Skinny-128-256 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled round for Skinny-128-256 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_256_encrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -549,7 +724,6 @@ void skinny_128_256_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -570,50 +744,19 @@ void skinny_128_256_encrypt TK2[3] = le_load_word32(ks->TK2 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_256_round(s0, s1, s2, s3, 0, 0); + skinny_128_256_round(s3, s0, s1, s2, 1, 1); + skinny_128_256_round(s2, s3, s0, s1, 0, 2); + skinny_128_256_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -624,6 +767,63 @@ void skinny_128_256_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_inv_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_256_decrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -634,9 +834,8 @@ void skinny_128_256_decrypt uint32_t TK2[4]; uint8_t rc = 0x09; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -658,7 +857,7 @@ void skinny_128_256_decrypt TK2[2] = le_load_word32(ks->TK2 + 8); TK2[3] = le_load_word32(ks->TK2 + 12); for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2. + /* Also fast-forward the LFSR's on every byte of TK2 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -666,47 +865,20 @@ void skinny_128_256_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); + skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_256_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_256_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_256_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_256_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_256_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_256_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_256_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -723,7 +895,6 @@ void skinny_128_256_encrypt_tk_full uint32_t s0, s1, s2, s3; uint32_t TK1[4]; uint32_t TK2[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -743,42 +914,12 @@ void skinny_128_256_encrypt_tk_full TK2[2] = le_load_word32(key + 24); TK2[3] = le_load_word32(key + 28); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ diff --git a/romulus/Implementations/crypto_aead/romulusm1/rhys/internal-skinnyutil.h b/romulus/Implementations/crypto_aead/romulusm1/rhys/internal-skinnyutil.h index 83136cb..8a5296d 100644 --- a/romulus/Implementations/crypto_aead/romulusm1/rhys/internal-skinnyutil.h +++ b/romulus/Implementations/crypto_aead/romulusm1/rhys/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: diff --git a/romulus/Implementations/crypto_aead/romulusm2/rhys/internal-skinny128.c b/romulus/Implementations/crypto_aead/romulusm2/rhys/internal-skinny128.c index 579ced1..d4adca0 100644 --- a/romulus/Implementations/crypto_aead/romulusm2/rhys/internal-skinny128.c +++ b/romulus/Implementations/crypto_aead/romulusm2/rhys/internal-skinny128.c @@ -90,7 +90,7 @@ void skinny_128_384_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -99,11 +99,25 @@ void skinny_128_384_init schedule[0] = TK2[0] ^ TK3[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ TK3[1] ^ (rc >> 4); - /* Permute TK2 and TK3 for the next round */ - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); + /* Permute the bottom half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_permute_tk_half(TK3[2], TK3[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + skinny128_LFSR3(TK3[2]); + skinny128_LFSR3(TK3[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ TK3[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ TK3[3] ^ (rc >> 4); - /* Apply the LFSR's to TK2 and TK3 */ + /* Permute the top half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); + skinny128_permute_tk_half(TK3[0], TK3[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR3(TK3[0]); @@ -112,6 +126,98 @@ void skinny_128_384_init #endif } +/** + * \brief Performs an unrolled round for Skinny-128-384 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + } while (0) + +/** + * \brief Performs an unrolled round for Skinny-128-384 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK3[(1 - half) * 2]); \ + skinny128_LFSR3(TK3[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_384_encrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -125,7 +231,6 @@ void skinny_128_384_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -150,53 +255,19 @@ void skinny_128_384_encrypt TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_384_round(s0, s1, s2, s3, 0, 0); + skinny_128_384_round(s3, s0, s1, s2, 1, 1); + skinny_128_384_round(s2, s3, s0, s1, 0, 2); + skinny_128_384_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -207,6 +278,93 @@ void skinny_128_384_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Inverse permutation on TK1 for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + s1 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s2 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK3[(1 - half) * 2]); \ + skinny128_LFSR2(TK3[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_384_decrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -218,9 +376,8 @@ void skinny_128_384_decrypt uint32_t TK3[4]; uint8_t rc = 0x15; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -251,7 +408,7 @@ void skinny_128_384_decrypt skinny128_fast_forward_tk(TK2); skinny128_fast_forward_tk(TK3); for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2 and TK3. + /* Also fast-forward the LFSR's on every byte of TK2 and TK3 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -263,50 +420,20 @@ void skinny_128_384_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_inv_permute_tk(TK3); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); - skinny128_LFSR2(TK3[2]); - skinny128_LFSR2(TK3[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); + skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_384_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_384_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_384_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_384_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_384_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_384_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_384_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -316,6 +443,57 @@ void skinny_128_384_decrypt le_store_word32(output + 12, s3); } +/** + * \def skinny_128_384_round_tk2(s0, s1, s2, s3, half) + * \brief Performs an unrolled round for skinny_128_384_encrypt_tk2(). + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#if SKINNY_128_SMALL_SCHEDULE +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round_tk_full(s0, s1, s2, s3, half) +#else /* !SKINNY_128_SMALL_SCHEDULE */ +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2] ^ TK2[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1] ^ \ + TK2[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 and TK2 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) +#endif /* !SKINNY_128_SMALL_SCHEDULE */ + void skinny_128_384_encrypt_tk2 (skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input, const unsigned char *tk2) @@ -329,7 +507,6 @@ void skinny_128_384_encrypt_tk2 #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -354,53 +531,14 @@ void skinny_128_384_encrypt_tk2 TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0] ^ TK2[0]; - s1 ^= schedule[1] ^ TK1[1] ^ TK2[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK3); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); -#else - schedule += 2; + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk2(s0, s1, s2, s3, 0, 0); + skinny_128_384_round_tk2(s3, s0, s1, s2, 1, 1); + skinny_128_384_round_tk2(s2, s3, s0, s1, 0, 2); + skinny_128_384_round_tk2(s1, s2, s3, s0, 1, 3); +#if !SKINNY_128_SMALL_SCHEDULE + schedule += 8; #endif } @@ -419,7 +557,6 @@ void skinny_128_384_encrypt_tk_full uint32_t TK1[4]; uint32_t TK2[4]; uint32_t TK3[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -443,45 +580,12 @@ void skinny_128_384_encrypt_tk_full TK3[2] = le_load_word32(key + 40); TK3[3] = le_load_word32(key + 44); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ @@ -518,7 +622,7 @@ void skinny_128_256_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -527,16 +631,87 @@ void skinny_128_256_init schedule[0] = TK2[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ (rc >> 4); - /* Permute TK2 for the next round */ - skinny128_permute_tk(TK2); + /* Permute the bottom half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ (rc >> 4); - /* Apply the LFSR to TK2 */ + /* Permute the top half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); } #endif } +/** + * \brief Performs an unrolled round for Skinny-128-256 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled round for Skinny-128-256 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_256_encrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -549,7 +724,6 @@ void skinny_128_256_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -570,50 +744,19 @@ void skinny_128_256_encrypt TK2[3] = le_load_word32(ks->TK2 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_256_round(s0, s1, s2, s3, 0, 0); + skinny_128_256_round(s3, s0, s1, s2, 1, 1); + skinny_128_256_round(s2, s3, s0, s1, 0, 2); + skinny_128_256_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -624,6 +767,63 @@ void skinny_128_256_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_inv_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_256_decrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -634,9 +834,8 @@ void skinny_128_256_decrypt uint32_t TK2[4]; uint8_t rc = 0x09; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -658,7 +857,7 @@ void skinny_128_256_decrypt TK2[2] = le_load_word32(ks->TK2 + 8); TK2[3] = le_load_word32(ks->TK2 + 12); for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2. + /* Also fast-forward the LFSR's on every byte of TK2 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -666,47 +865,20 @@ void skinny_128_256_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); + skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_256_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_256_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_256_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_256_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_256_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_256_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_256_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -723,7 +895,6 @@ void skinny_128_256_encrypt_tk_full uint32_t s0, s1, s2, s3; uint32_t TK1[4]; uint32_t TK2[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -743,42 +914,12 @@ void skinny_128_256_encrypt_tk_full TK2[2] = le_load_word32(key + 24); TK2[3] = le_load_word32(key + 28); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ diff --git a/romulus/Implementations/crypto_aead/romulusm2/rhys/internal-skinnyutil.h b/romulus/Implementations/crypto_aead/romulusm2/rhys/internal-skinnyutil.h index 83136cb..8a5296d 100644 --- a/romulus/Implementations/crypto_aead/romulusm2/rhys/internal-skinnyutil.h +++ b/romulus/Implementations/crypto_aead/romulusm2/rhys/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: diff --git a/romulus/Implementations/crypto_aead/romulusm3/rhys/internal-skinny128.c b/romulus/Implementations/crypto_aead/romulusm3/rhys/internal-skinny128.c index 579ced1..d4adca0 100644 --- a/romulus/Implementations/crypto_aead/romulusm3/rhys/internal-skinny128.c +++ b/romulus/Implementations/crypto_aead/romulusm3/rhys/internal-skinny128.c @@ -90,7 +90,7 @@ void skinny_128_384_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -99,11 +99,25 @@ void skinny_128_384_init schedule[0] = TK2[0] ^ TK3[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ TK3[1] ^ (rc >> 4); - /* Permute TK2 and TK3 for the next round */ - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); + /* Permute the bottom half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_permute_tk_half(TK3[2], TK3[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + skinny128_LFSR3(TK3[2]); + skinny128_LFSR3(TK3[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ TK3[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ TK3[3] ^ (rc >> 4); - /* Apply the LFSR's to TK2 and TK3 */ + /* Permute the top half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); + skinny128_permute_tk_half(TK3[0], TK3[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR3(TK3[0]); @@ -112,6 +126,98 @@ void skinny_128_384_init #endif } +/** + * \brief Performs an unrolled round for Skinny-128-384 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + } while (0) + +/** + * \brief Performs an unrolled round for Skinny-128-384 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK3[(1 - half) * 2]); \ + skinny128_LFSR3(TK3[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_384_encrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -125,7 +231,6 @@ void skinny_128_384_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -150,53 +255,19 @@ void skinny_128_384_encrypt TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_384_round(s0, s1, s2, s3, 0, 0); + skinny_128_384_round(s3, s0, s1, s2, 1, 1); + skinny_128_384_round(s2, s3, s0, s1, 0, 2); + skinny_128_384_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -207,6 +278,93 @@ void skinny_128_384_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Inverse permutation on TK1 for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + s1 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s2 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK3[(1 - half) * 2]); \ + skinny128_LFSR2(TK3[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_384_decrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -218,9 +376,8 @@ void skinny_128_384_decrypt uint32_t TK3[4]; uint8_t rc = 0x15; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -251,7 +408,7 @@ void skinny_128_384_decrypt skinny128_fast_forward_tk(TK2); skinny128_fast_forward_tk(TK3); for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2 and TK3. + /* Also fast-forward the LFSR's on every byte of TK2 and TK3 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -263,50 +420,20 @@ void skinny_128_384_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_inv_permute_tk(TK3); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); - skinny128_LFSR2(TK3[2]); - skinny128_LFSR2(TK3[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); + skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_384_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_384_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_384_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_384_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_384_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_384_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_384_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -316,6 +443,57 @@ void skinny_128_384_decrypt le_store_word32(output + 12, s3); } +/** + * \def skinny_128_384_round_tk2(s0, s1, s2, s3, half) + * \brief Performs an unrolled round for skinny_128_384_encrypt_tk2(). + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#if SKINNY_128_SMALL_SCHEDULE +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round_tk_full(s0, s1, s2, s3, half) +#else /* !SKINNY_128_SMALL_SCHEDULE */ +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2] ^ TK2[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1] ^ \ + TK2[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 and TK2 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) +#endif /* !SKINNY_128_SMALL_SCHEDULE */ + void skinny_128_384_encrypt_tk2 (skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input, const unsigned char *tk2) @@ -329,7 +507,6 @@ void skinny_128_384_encrypt_tk2 #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -354,53 +531,14 @@ void skinny_128_384_encrypt_tk2 TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0] ^ TK2[0]; - s1 ^= schedule[1] ^ TK1[1] ^ TK2[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK3); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); -#else - schedule += 2; + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk2(s0, s1, s2, s3, 0, 0); + skinny_128_384_round_tk2(s3, s0, s1, s2, 1, 1); + skinny_128_384_round_tk2(s2, s3, s0, s1, 0, 2); + skinny_128_384_round_tk2(s1, s2, s3, s0, 1, 3); +#if !SKINNY_128_SMALL_SCHEDULE + schedule += 8; #endif } @@ -419,7 +557,6 @@ void skinny_128_384_encrypt_tk_full uint32_t TK1[4]; uint32_t TK2[4]; uint32_t TK3[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -443,45 +580,12 @@ void skinny_128_384_encrypt_tk_full TK3[2] = le_load_word32(key + 40); TK3[3] = le_load_word32(key + 44); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ @@ -518,7 +622,7 @@ void skinny_128_256_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -527,16 +631,87 @@ void skinny_128_256_init schedule[0] = TK2[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ (rc >> 4); - /* Permute TK2 for the next round */ - skinny128_permute_tk(TK2); + /* Permute the bottom half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ (rc >> 4); - /* Apply the LFSR to TK2 */ + /* Permute the top half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); } #endif } +/** + * \brief Performs an unrolled round for Skinny-128-256 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled round for Skinny-128-256 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_256_encrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -549,7 +724,6 @@ void skinny_128_256_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -570,50 +744,19 @@ void skinny_128_256_encrypt TK2[3] = le_load_word32(ks->TK2 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_256_round(s0, s1, s2, s3, 0, 0); + skinny_128_256_round(s3, s0, s1, s2, 1, 1); + skinny_128_256_round(s2, s3, s0, s1, 0, 2); + skinny_128_256_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -624,6 +767,63 @@ void skinny_128_256_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_inv_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_256_decrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -634,9 +834,8 @@ void skinny_128_256_decrypt uint32_t TK2[4]; uint8_t rc = 0x09; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -658,7 +857,7 @@ void skinny_128_256_decrypt TK2[2] = le_load_word32(ks->TK2 + 8); TK2[3] = le_load_word32(ks->TK2 + 12); for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2. + /* Also fast-forward the LFSR's on every byte of TK2 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -666,47 +865,20 @@ void skinny_128_256_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); + skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_256_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_256_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_256_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_256_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_256_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_256_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_256_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -723,7 +895,6 @@ void skinny_128_256_encrypt_tk_full uint32_t s0, s1, s2, s3; uint32_t TK1[4]; uint32_t TK2[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -743,42 +914,12 @@ void skinny_128_256_encrypt_tk_full TK2[2] = le_load_word32(key + 24); TK2[3] = le_load_word32(key + 28); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ diff --git a/romulus/Implementations/crypto_aead/romulusm3/rhys/internal-skinnyutil.h b/romulus/Implementations/crypto_aead/romulusm3/rhys/internal-skinnyutil.h index 83136cb..8a5296d 100644 --- a/romulus/Implementations/crypto_aead/romulusm3/rhys/internal-skinnyutil.h +++ b/romulus/Implementations/crypto_aead/romulusm3/rhys/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: diff --git a/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/api.h b/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/api.h new file mode 100644 index 0000000..a4aa567 --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 diff --git a/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/encrypt.c b/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/encrypt.c new file mode 100644 index 0000000..f329721 --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/encrypt.c @@ -0,0 +1,1136 @@ +/* + * Date: 29 November 2018 + * Contact: Thomas Peyrin - thomas.peyrin@gmail.com + * Mustafa Khairallah - mustafam001@e.ntu.edu.sg + */ + +#include "crypto_aead.h" +#include "api.h" +#include "skinny.h" +#include +#include + +void pad (const unsigned char* m, unsigned char* mp, int len8) { + +#ifdef ___ENABLE_DWORD_CAST + + if (0 == len8) { + *(uint64_t*)(&mp[0]) = 0; + *(uint64_t*)(&mp[8]) = 0; + } else if (8 > len8) { + *(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]) & (0xffffffffffffffff >> (64 - len8*8)); + *(uint64_t*)(&mp[8]) = 0; + mp[15] = len8; + } else if (8 == len8) { + *(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]); + *(uint64_t*)(&mp[8]) = 0; + mp[15] = 8; + } else if (16 > len8) { + *(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]); + *(uint64_t*)(&mp[8]) = *(uint64_t*)(&m[8]) & (0xffffffffffffffff >> (128 - len8*8)); + mp[15] = len8; + } else { + *(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]); + *(uint64_t*)(&mp[8]) = *(uint64_t*)(&m[8]); + } + +#else + + if (0 == len8) { + *(uint32_t*)(&mp[0]) = 0; + *(uint32_t*)(&mp[4]) = 0; + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + } else if (4 > len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]) & (0xffffffff >> (32 - len8*8)); + *(uint32_t*)(&mp[4]) = 0; + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = len8; + } else if (4 == len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = 0; + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = 4; + } else if (8 > len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]) & (0xffffffff >> (64 - len8*8)); + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = len8; + } else if (8 == len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = 8; + } else if (12 > len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]) & (0xffffffff >> (96 - len8*8)); + *(uint32_t*)(&mp[12]) = 0; + mp[15] = len8; + } else if (12 == len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]); + *(uint32_t*)(&mp[12]) = 0; + mp[15] = 12; + } else if (16 > len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]); + *(uint32_t*)(&mp[12]) = *(uint32_t*)(&m[12]) & (0xffffffff >> (128 - len8*8)); + mp[15] = len8; + } else { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]); + *(uint32_t*)(&mp[12]) = *(uint32_t*)(&m[12]); + } + +#endif + +} + +void g8A (unsigned char* s, unsigned char* c) { + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t s0 = *(uint64_t*)(&s[0]); + uint64_t s1 = *(uint64_t*)(&s[8]); + + uint64_t c0, c1; + + c0 = ((s0 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x8080808080808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x8080808080808080); + + *(uint64_t*)(&c[0]) = c0; + *(uint64_t*)(&c[8]) = c1; + +#else + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t c0, c1, c2, c3; + + c0 = ((s0 >> 1) & 0x7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x80808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x80808080); + c2 = ((s2 >> 1) & 0x7f7f7f7f) ^ ((s2 ^ (s2 << 7)) & 0x80808080); + c3 = ((s3 >> 1) & 0x7f7f7f7f) ^ ((s3 ^ (s3 << 7)) & 0x80808080); + + *(uint32_t*)(&c[0]) = c0; + *(uint32_t*)(&c[4]) = c1; + *(uint32_t*)(&c[8]) = c2; + *(uint32_t*)(&c[12]) = c3; + +#endif + +} + +void g8A_for_Tag_Generation (unsigned char* s, unsigned char* c) { + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t s0 = *(uint64_t*)(&s[0]); + uint64_t s1 = *(uint64_t*)(&s[8]); + + uint64_t c0, c1; + + c0 = ((s0 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x8080808080808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x8080808080808080); + + // use byte access because of memory alignment. + // c is not always in word(4 byte) alignment. + c[0] = c0 &0xFF; + c[1] = (c0>>8) &0xFF; + c[2] = (c0>>16)&0xFF; + c[3] = (c0>>24)&0xFF; + c[4] = (c0>>32)&0xFF; + c[5] = (c0>>40)&0xFF; + c[6] = (c0>>48)&0xFF; + c[7] = c0>>56; + c[8] = c1 &0xFF; + c[9] = (c1>>8) &0xFF; + c[10] = (c1>>16)&0xFF; + c[11] = (c1>>24)&0xFF; + c[12] = (c1>>32)&0xFF; + c[13] = (c1>>40)&0xFF; + c[14] = (c1>>48)&0xFF; + c[15] = c1>>56; + +#else + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t c0, c1, c2, c3; + + c0 = ((s0 >> 1) & 0x7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x80808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x80808080); + c2 = ((s2 >> 1) & 0x7f7f7f7f) ^ ((s2 ^ (s2 << 7)) & 0x80808080); + c3 = ((s3 >> 1) & 0x7f7f7f7f) ^ ((s3 ^ (s3 << 7)) & 0x80808080); + + // use byte access because of memory alignment. + // c is not always in word(4 byte) alignment. + c[0] = c0 &0xFF; + c[1] = (c0>>8) &0xFF; + c[2] = (c0>>16)&0xFF; + c[3] = c0>>24; + c[4] = c1 &0xFF; + c[5] = (c1>>8) &0xFF; + c[6] = (c1>>16)&0xFF; + c[7] = c1>>24; + c[8] = c2 &0xFF; + c[9] = (c2>>8) &0xFF; + c[10] = (c2>>16)&0xFF; + c[11] = c2>>24; + c[12] = c3 &0xFF; + c[13] = (c3>>8) &0xFF; + c[14] = (c3>>16)&0xFF; + c[15] = c3>>24; + +#endif + +} + +void rho_ad_eqov16 ( + const unsigned char* m, + unsigned char* s) { + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) ^= *(uint64_t*)(&m[0]); + *(uint64_t*)(&s[8]) ^= *(uint64_t*)(&m[8]); + +#else + + *(uint32_t*)(&s[0]) ^= *(uint32_t*)(&m[0]); + *(uint32_t*)(&s[4]) ^= *(uint32_t*)(&m[4]); + *(uint32_t*)(&s[8]) ^= *(uint32_t*)(&m[8]); + *(uint32_t*)(&s[12]) ^= *(uint32_t*)(&m[12]); + +#endif + +} + +void rho_ad_ud16 ( + const unsigned char* m, + unsigned char* s, + int len8) { + + unsigned char mp [16]; + pad(m,mp,len8); + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) ^= *(uint64_t*)(&mp[0]); + *(uint64_t*)(&s[8]) ^= *(uint64_t*)(&mp[8]); + +#else + + *(uint32_t*)(&s[0]) ^= *(uint32_t*)(&mp[0]); + *(uint32_t*)(&s[4]) ^= *(uint32_t*)(&mp[4]); + *(uint32_t*)(&s[8]) ^= *(uint32_t*)(&mp[8]); + *(uint32_t*)(&s[12]) ^= *(uint32_t*)(&mp[12]); + +#endif + +} + +void rho_eqov16 ( + const unsigned char* m, + unsigned char* c, + unsigned char* s) { + + g8A(s,c); + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t c0 = *(uint64_t*)(&c[0]); + uint64_t c1 = *(uint64_t*)(&c[8]); + + uint64_t s0 = *(uint64_t*)(&s[0]); + uint64_t s1 = *(uint64_t*)(&s[8]); + + uint64_t m0 = *(uint64_t*)(&m[0]); + uint64_t m1 = *(uint64_t*)(&m[8]); + + s0 ^= m0; + s1 ^= m1; + + c0 ^= m0; + c1 ^= m1; + + *(uint64_t*)(&s[0]) = s0; + *(uint64_t*)(&s[8]) = s1; + + *(uint64_t*)(&c[0]) = c0; + *(uint64_t*)(&c[8]) = c1; + +#else + + uint32_t c0 = *(uint32_t*)(&c[0]); + uint32_t c1 = *(uint32_t*)(&c[4]); + uint32_t c2 = *(uint32_t*)(&c[8]); + uint32_t c3 = *(uint32_t*)(&c[12]); + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t m0 = *(uint32_t*)(&m[0]); + uint32_t m1 = *(uint32_t*)(&m[4]); + uint32_t m2 = *(uint32_t*)(&m[8]); + uint32_t m3 = *(uint32_t*)(&m[12]); + + s0 ^= m0; + s1 ^= m1; + s2 ^= m2; + s3 ^= m3; + + c0 ^= m0; + c1 ^= m1; + c2 ^= m2; + c3 ^= m3; + + *(uint32_t*)(&s[0]) = s0; + *(uint32_t*)(&s[4]) = s1; + *(uint32_t*)(&s[8]) = s2; + *(uint32_t*)(&s[12]) = s3; + + *(uint32_t*)(&c[0]) = c0; + *(uint32_t*)(&c[4]) = c1; + *(uint32_t*)(&c[8]) = c2; + *(uint32_t*)(&c[12]) = c3; + +#endif + +} + +void rho_ud16 ( + const unsigned char* m, + unsigned char* c, + unsigned char* s, + int len8) { + + unsigned char mp [16]; + + pad(m,mp,len8); + + g8A(s,c); + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t mp0 = *(uint64_t*)&mp[0]; + uint64_t mp1 = *(uint64_t*)&mp[8]; + uint64_t c0 = *(uint64_t*)&c[0]; + uint64_t c1 = *(uint64_t*)&c[8]; + + *(uint64_t*)(&s[0]) ^= mp0; + *(uint64_t*)(&s[8]) ^= mp1; + + if (0 == len8) { + c0 = 0; + c1 = 0; + } else if (8 > len8) { + c0 = c0 ^ (mp0 & 0xffffffffffffffff >> (64 - (len8*8))); + c0 = c0 ^ (c0 & 0xffffffffffffffff << ( (len8*8))); + c1 = 0; + } else if (8 == len8) { + c0 = c0 ^ mp0; + c1 = 0; + } else if (16 > len8) { + len8 -= 8; + c0 = c0 ^ mp0; + c1 = c1 ^ (mp1 & 0xffffffffffffffff >> (64 - (len8*8))); + c1 = c1 ^ (c1 & 0xffffffffffffffff << ( (len8*8))); + } else { + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + } + + *(uint64_t*)&c[0] = c0; + *(uint64_t*)&c[8] = c1; + +#else + + uint32_t mp0 = *(uint32_t*)&mp[0]; + uint32_t mp1 = *(uint32_t*)&mp[4]; + uint32_t mp2 = *(uint32_t*)&mp[8]; + uint32_t mp3 = *(uint32_t*)&mp[12]; + uint32_t c0 = *(uint32_t*)&c[0]; + uint32_t c1 = *(uint32_t*)&c[4]; + uint32_t c2 = *(uint32_t*)&c[8]; + uint32_t c3 = *(uint32_t*)&c[12]; + + *(uint32_t*)(&s[0]) ^= mp0; + *(uint32_t*)(&s[4]) ^= mp1; + *(uint32_t*)(&s[8]) ^= mp2; + *(uint32_t*)(&s[12]) ^= mp3; + + if (0 == len8) { + c0 = 0; + c1 = 0; + c2 = 0; + c3 = 0; + } else if (4 > len8) { + c0 = c0 ^ (mp0 & 0xffffffff >> (32 - (len8*8))); + c0 = c0 ^ (c0 & 0xffffffff << ( (len8*8))); + c1 = 0; + c2 = 0; + c3 = 0; + } else if (4 == len8) { + c0 = c0 ^ mp0; + c1 = 0; + c2 = 0; + c3 = 0; + } else if (8 > len8) { + len8 -= 4; + c0 = c0 ^ mp0; + c1 = c1 ^ (mp1 & 0xffffffff >> (32 - (len8*8))); + c1 = c1 ^ (c1 & 0xffffffff << ( (len8*8))); + c2 = 0; + c3 = 0; + } else if (8 == len8) { + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = 0; + c3 = 0; + } else if (12 > len8) { + len8 -= 8; + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = c2 ^ (mp2 & 0xffffffff >> (32 - (len8*8))); + c2 = c2 ^ (c2 & 0xffffffff << ( (len8*8))); + c3 = 0; + } else if (12 == len8) { + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = c2 ^ mp2; + c3 = 0; + } else if (16 > len8) { + len8 -= 12; + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = c2 ^ mp2; + c3 = c3 ^ (mp3 & 0xffffffff >> (32 - (len8*8))); + c3 = c3 ^ (c3 & 0xffffffff << ( (len8*8))); + } else { + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = c2 ^ mp2; + c3 = c3 ^ mp3; + } + + *(uint32_t*)&c[0] = c0; + *(uint32_t*)&c[4] = c1; + *(uint32_t*)&c[8] = c2; + *(uint32_t*)&c[12] = c3; + +#endif + +} + +void irho_eqov16 ( + unsigned char* m, + const unsigned char* c, + unsigned char* s) { + + g8A(s,m); + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t c0 = *(uint64_t*)(&c[0]); + uint64_t c1 = *(uint64_t*)(&c[8]); + + uint64_t s0 = *(uint64_t*)(&s[0]); + uint64_t s1 = *(uint64_t*)(&s[8]); + + uint64_t m0 = *(uint64_t*)(&m[0]); + uint64_t m1 = *(uint64_t*)(&m[8]); + + s0 ^= c0 ^ m0; + s1 ^= c1 ^ m1; + + m0 ^= c0; + m1 ^= c1; + + *(uint64_t*)(&s[0]) = s0; + *(uint64_t*)(&s[8]) = s1; + + *(uint64_t*)(&m[0]) = m0; + *(uint64_t*)(&m[8]) = m1; + +#else + + uint32_t c0 = *(uint32_t*)(&c[0]); + uint32_t c1 = *(uint32_t*)(&c[4]); + uint32_t c2 = *(uint32_t*)(&c[8]); + uint32_t c3 = *(uint32_t*)(&c[12]); + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t m0 = *(uint32_t*)(&m[0]); + uint32_t m1 = *(uint32_t*)(&m[4]); + uint32_t m2 = *(uint32_t*)(&m[8]); + uint32_t m3 = *(uint32_t*)(&m[12]); + + s0 ^= c0 ^ m0; + s1 ^= c1 ^ m1; + s2 ^= c2 ^ m2; + s3 ^= c3 ^ m3; + + m0 ^= c0; + m1 ^= c1; + m2 ^= c2; + m3 ^= c3; + + *(uint32_t*)(&s[0]) = s0; + *(uint32_t*)(&s[4]) = s1; + *(uint32_t*)(&s[8]) = s2; + *(uint32_t*)(&s[12]) = s3; + + *(uint32_t*)(&m[0]) = m0; + *(uint32_t*)(&m[4]) = m1; + *(uint32_t*)(&m[8]) = m2; + *(uint32_t*)(&m[12]) = m3; + +#endif + +} + +void irho_ud16 ( + unsigned char* m, + const unsigned char* c, + unsigned char* s, + int len8) { + + unsigned char cp [16]; + + pad(c,cp,len8); + + g8A(s,m); + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t cp0 = *(uint64_t*)&cp[0]; + uint64_t cp1 = *(uint64_t*)&cp[8]; + uint64_t m0 = *(uint64_t*)&m[0]; + uint64_t m1 = *(uint64_t*)&m[8]; + uint64_t s0 = *(uint64_t*)&s[0]; + uint64_t s1 = *(uint64_t*)&s[8]; + + s0 ^= cp0; + s1 ^= cp1; + + if (0 == len8) { + m0 = 0; + m1 = 0; + } else if (8 > len8) { + s0 = s0 ^ (m0 & 0xffffffffffffffff >> (64 - (len8*8))); + + m0 = m0 ^ (cp0 & 0xffffffffffffffff >> (64 - (len8*8))); + m0 = m0 ^ (m0 & 0xffffffffffffffff << ( (len8*8))); + m1 = 0; + } else if (8 == len8) { + s0 = s0 ^ m0; + + m0 = m0 ^ cp0; + m1 = 0; + } else if (16 > len8) { + len8 -= 8; + s0 = s0 ^ m0; + s1 = s1 ^ (m1 & 0xffffffffffffffff >> (64 - (len8*8))); + + m0 = m0 ^ cp0; + m1 = m1 ^ (cp1 & 0xffffffffffffffff >> (64 - (len8*8))); + m1 = m1 ^ (m1 & 0xffffffffffffffff << ( (len8*8))); + } else { + s0 = s0 ^ m0; + s1 = s1 ^ m1; + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + } + + *(uint64_t*)&s[0] = s0; + *(uint64_t*)&s[8] = s1; + *(uint64_t*)&m[0] = m0; + *(uint64_t*)&m[8] = m1; + +#else + + uint32_t cp0 = *(uint32_t*)&cp[0]; + uint32_t cp1 = *(uint32_t*)&cp[4]; + uint32_t cp2 = *(uint32_t*)&cp[8]; + uint32_t cp3 = *(uint32_t*)&cp[12]; + uint32_t m0 = *(uint32_t*)&m[0]; + uint32_t m1 = *(uint32_t*)&m[4]; + uint32_t m2 = *(uint32_t*)&m[8]; + uint32_t m3 = *(uint32_t*)&m[12]; + uint32_t s0 = *(uint32_t*)&s[0]; + uint32_t s1 = *(uint32_t*)&s[4]; + uint32_t s2 = *(uint32_t*)&s[8]; + uint32_t s3 = *(uint32_t*)&s[12]; + + s0 ^= cp0; + s1 ^= cp1; + s2 ^= cp2; + s3 ^= cp3; + + if (0 == len8) { + m0 = 0; + m1 = 0; + m2 = 0; + m3 = 0; + } else if (4 > len8) { + s0 = s0 ^ (m0 & 0xffffffff >> (32 - (len8*8))); + + m0 = m0 ^ (cp0 & 0xffffffff >> (32 - (len8*8))); + m0 = m0 ^ (m0 & 0xffffffff << ( (len8*8))); + m1 = 0; + m2 = 0; + m3 = 0; + } else if (4 == len8) { + s0 = s0 ^ m0; + + m0 = m0 ^ cp0; + m1 = 0; + m2 = 0; + m3 = 0; + } else if (8 > len8) { + len8 -= 4; + s0 = s0 ^ m0; + s1 = s1 ^ (m1 & 0xffffffff >> (32 - (len8*8))); + + m0 = m0 ^ cp0; + m1 = m1 ^ (cp1 & 0xffffffff >> (32 - (len8*8))); + m1 = m1 ^ (m1 & 0xffffffff << ( (len8*8))); + m2 = 0; + m3 = 0; + } else if (8 == len8) { + s0 = s0 ^ m0; + s1 = s1 ^ m1; + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = 0; + m3 = 0; + } else if (12 > len8) { + len8 -= 8; + s0 = s0 ^ m0; + s1 = s1 ^ m1; + s2 = s2 ^ (m2 & 0xffffffff >> (32 - (len8*8))); + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = m2 ^ (cp2 & 0xffffffff >> (32 - (len8*8))); + m2 = m2 ^ (m2 & 0xffffffff << ( (len8*8))); + m3 = 0; + } else if (12 == len8) { + s0 = s0 ^ m0; + s1 = s1 ^ m1; + s2 = s2 ^ m2; + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = m2 ^ cp2; + m3 = 0; + } else if (16 > len8) { + len8 -= 12; + s0 = s0 ^ m0; + s1 = s1 ^ m1; + s2 = s2 ^ m2; + s3 = s3 ^ (m3 & 0xffffffff >> (32 - (len8*8))); + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = m2 ^ cp2; + m3 = m3 ^ (cp3 & 0xffffffff >> (32 - (len8*8))); + m3 = m3 ^ (m3 & 0xffffffff << ( (len8*8))); + } else { + s0 = s0 ^ m0; + s1 = s1 ^ m1; + s2 = s2 ^ m2; + s3 = s3 ^ m3; + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = m2 ^ cp2; + m3 = m3 ^ cp3; + } + + *(uint32_t*)&s[0] = s0; + *(uint32_t*)&s[4] = s1; + *(uint32_t*)&s[8] = s2; + *(uint32_t*)&s[12] = s3; + *(uint32_t*)&m[0] = m0; + *(uint32_t*)&m[4] = m1; + *(uint32_t*)&m[8] = m2; + *(uint32_t*)&m[12] = m3; + +#endif + +} + +void reset_lfsr_gf56 (unsigned char* CNT) { + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&CNT[0]) = 0x0000000000000001; // CNT7 CNT6 CNT5 CNT4 CNT3 CNT2 CNT1 CNT0 + +#else + + *(uint32_t*)(&CNT[0]) = 0x00000001; // CNT3 CNT2 CNT1 CNT0 + *(uint32_t*)(&CNT[4]) = 0x00000000; // CNT7 CNT6 CNT5 CNT4 + +#endif + +} + +void lfsr_gf56 (unsigned char* CNT) { + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t C0; + uint64_t fb0; + + C0 = *(uint64_t*)(&CNT[0]); // CNT7 CNT6 CNT5 CNT4 CNT3 CNT2 CNT1 CNT0 + + fb0 = 0; + if (CNT[6] & 0x80) { + fb0 = 0x95; + } + + C0 = C0 << 1 ^ fb0; + + *(uint64_t*)(&CNT[0]) = C0; + +#else + + uint32_t C0; + uint32_t C1; + uint32_t fb0; + + C0 = *(uint32_t*)(&CNT[0]); // CNT3 CNT2 CNT1 CNT0 + C1 = *(uint32_t*)(&CNT[4]); // CNT7 CNT6 CNT5 CNT4 + + fb0 = 0; + if (CNT[6] & 0x80) { + fb0 = 0x95; + } + + C1 = C1 << 1 | C0 >> 31; + C0 = C0 << 1 ^ fb0; + + *(uint32_t*)(&CNT[0]) = C0; + *(uint32_t*)(&CNT[4]) = C1; + +#endif + +} + +void block_cipher( + unsigned char* s, + const unsigned char* k, unsigned char* T, + unsigned char* CNT, unsigned char D, + skinny_ctrl* p_skinny_ctrl) { + + CNT[7] = D; + p_skinny_ctrl->func_skinny_128_384_enc(s, p_skinny_ctrl, CNT, T, k); + +} + +void nonce_encryption ( + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + skinny_ctrl* p_skinny_ctrl) { + + block_cipher(s,k,(unsigned char*)N,CNT,D,p_skinny_ctrl); + +} + +void generate_tag ( + unsigned char** c, unsigned char* s, + unsigned long long* clen) { + + g8A_for_Tag_Generation(s, *c); + + *c = *c + 16; + *c = *c - *clen; + +} + +unsigned long long msg_encryption_eqov16 ( + const unsigned char** M, unsigned char** c, + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long mlen, + skinny_ctrl* p_skinny_ctrl) { + + rho_eqov16(*M, *c, s); + *c = *c + 16; + *M = *M + 16; + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,D,p_skinny_ctrl); + return mlen - 16; + +} + +unsigned long long msg_encryption_ud16 ( + const unsigned char** M, unsigned char** c, + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long mlen, + skinny_ctrl* p_skinny_ctrl) { + +// char msg[64]; +// +// unsigned int st = (unsigned int )read_cycle(); + + rho_ud16(*M, *c, s, mlen); + +// unsigned int ed = (unsigned int )read_cycle(); +// sprintf(msg, "rho_ud16 %d\n", ed-st); +// SerialPuts(msg); +// +// fprint_bstr(NULL, "c = ", *c, 16); + + *c = *c + mlen; + *M = *M + mlen; + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,D,p_skinny_ctrl); + return 0; + +} + +unsigned long long msg_decryption_eqov16 ( + unsigned char** M, const unsigned char** c, + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long clen, + skinny_ctrl* p_skinny_ctrl) { + + irho_eqov16(*M, *c, s); + *c = *c + 16; + *M = *M + 16; + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,D,p_skinny_ctrl); + + return clen - 16; + +} + +unsigned long long msg_decryption_ud16 ( + unsigned char** M, const unsigned char** c, + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long clen, + skinny_ctrl* p_skinny_ctrl) { + + irho_ud16(*M, *c, s, clen); + *c = *c + clen; + *M = *M + clen; + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,D,p_skinny_ctrl); + return 0; + +} + +unsigned long long ad_encryption_eqov32 ( + const unsigned char** A, unsigned char* s, + const unsigned char* k, unsigned long long adlen, + unsigned char* CNT, + unsigned char D, + skinny_ctrl* p_skinny_ctrl) { + + unsigned char T [16]; + + rho_ad_eqov16(*A, s); + *A = *A + 16; + lfsr_gf56(CNT); + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&T[0]) = *(uint64_t*)(&(*A)[0]); + *(uint64_t*)(&T[8]) = *(uint64_t*)(&(*A)[8]); + +#else + + *(uint32_t*)(&T[0]) = *(uint32_t*)(&(*A)[0]); + *(uint32_t*)(&T[4]) = *(uint32_t*)(&(*A)[4]); + *(uint32_t*)(&T[8]) = *(uint32_t*)(&(*A)[8]); + *(uint32_t*)(&T[12]) = *(uint32_t*)(&(*A)[12]); + +#endif + + *A = *A + 16; + block_cipher(s,k,T,CNT,D,p_skinny_ctrl); + lfsr_gf56(CNT); + + return adlen - 32; + +} + +unsigned long long ad_encryption_ov16 ( + const unsigned char** A, unsigned char* s, + const unsigned char* k, unsigned long long adlen, + unsigned char* CNT, + unsigned char D, + skinny_ctrl* p_skinny_ctrl) { + + unsigned char T [16]; + + adlen = adlen - 16; + rho_ad_eqov16(*A, s); + *A = *A + 16; + lfsr_gf56(CNT); + + pad(*A, T, adlen); + *A = *A + adlen; + block_cipher(s,k,T,CNT,D,p_skinny_ctrl); + lfsr_gf56(CNT); + + return 0; + +} + +unsigned long long ad_encryption_eq16 ( + const unsigned char** A, unsigned char* s, + unsigned char* CNT) { + + rho_ad_eqov16(*A, s); + *A = *A + 16; + lfsr_gf56(CNT); + + return 0; + +} + +unsigned long long ad_encryption_ud16( + const unsigned char** A, unsigned char* s, + unsigned long long adlen, + unsigned char* CNT) { + + rho_ad_ud16(*A, s, adlen); + *A = *A + adlen; + lfsr_gf56(CNT); + + return 0; + +} + +int crypto_aead_encrypt ( + unsigned char* c, unsigned long long* clen, + const unsigned char* m, unsigned long long mlen, + const unsigned char* ad, unsigned long long adlen, + const unsigned char* nsec, + const unsigned char* npub, + const unsigned char* k) { + + unsigned char s[16]; + unsigned char CNT[8]; + const unsigned char* A; + const unsigned char* M; + const unsigned char* N; + + skinny_ctrl ctrl; + ctrl.func_skinny_128_384_enc = skinny_128_384_enc123_12; + + (void) nsec; + A = ad; + M = m; + N = npub; + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) = 0; + *(uint64_t*)(&s[8]) = 0; + +#else + + *(uint32_t*)(&s[0]) = 0; + *(uint32_t*)(&s[4]) = 0; + *(uint32_t*)(&s[8]) = 0; + *(uint32_t*)(&s[12]) = 0; + +#endif + + reset_lfsr_gf56(CNT); + + if (adlen == 0) { // AD is an empty string + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,0x1a,&ctrl); + } + else while (adlen > 0) { + if (adlen < 16) { // The last block of AD is odd and incomplete + adlen = ad_encryption_ud16(&A,s,adlen,CNT); + nonce_encryption(N,CNT,s,k,0x1a,&ctrl); + } + else if (adlen == 16) { // The last block of AD is odd and complete + adlen = ad_encryption_eq16(&A,s,CNT); + nonce_encryption(N,CNT,s,k,0x18,&ctrl); + } + else if (adlen < 32) { // The last block of AD is even and incomplete + adlen = ad_encryption_ov16(&A,s,k,adlen,CNT,0x08,&ctrl); + nonce_encryption(N,CNT,s,k,0x1a,&ctrl); + } + else if (adlen == 32) { // The last block of AD is even and complete + adlen = ad_encryption_eqov32(&A,s,k,adlen,CNT,0x08,&ctrl); + nonce_encryption(N,CNT,s,k,0x18,&ctrl); + } + else { // A normal full pair of blocks of AD + adlen = ad_encryption_eqov32(&A,s,k,adlen,CNT,0x08,&ctrl); + } + } + + ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1; + + reset_lfsr_gf56(CNT); + + *clen = mlen + 16; + + if (mlen == 0) { // M is an empty string + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,0x15,&ctrl); + } + else while (mlen > 0) { + if (mlen < 16) { // The last block of M is incomplete + mlen = msg_encryption_ud16(&M,&c,N,CNT,s,k,0x15,mlen,&ctrl); + } + else if (mlen == 16) { // The last block of M is complete + mlen = msg_encryption_eqov16(&M,&c,N,CNT,s,k,0x14,mlen,&ctrl); + } + else { // A normal full message block + mlen = msg_encryption_eqov16(&M,&c,N,CNT,s,k,0x04,mlen,&ctrl); + } + } + + // Tag generation + generate_tag(&c,s,clen); + + return 0; + +} + +int crypto_aead_decrypt( + unsigned char *m,unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c,unsigned long long clen, + const unsigned char *ad,unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k) { + + unsigned char s[16]; + unsigned char T[16]; + unsigned char CNT[8]; + const unsigned char* A; + unsigned char* M; + const unsigned char* N; + + skinny_ctrl ctrl; + ctrl.func_skinny_128_384_enc = skinny_128_384_enc123_12; + + (void) nsec; + A = ad; + M = m; + N = npub; + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) = 0; + *(uint64_t*)(&s[8]) = 0; + +#else + + *(uint32_t*)(&s[0]) = 0; + *(uint32_t*)(&s[4]) = 0; + *(uint32_t*)(&s[8]) = 0; + *(uint32_t*)(&s[12]) = 0; + +#endif + + reset_lfsr_gf56(CNT); + + if (adlen == 0) { // AD is an empty string + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,0x1a,&ctrl); + } + else while (adlen > 0) { + if (adlen < 16) { // The last block of AD is odd and incomplete + adlen = ad_encryption_ud16(&A,s,adlen,CNT); + nonce_encryption(N,CNT,s,k,0x1a,&ctrl); + } + else if (adlen == 16) { // The last block of AD is odd and complete + adlen = ad_encryption_eq16(&A,s,CNT); + nonce_encryption(N,CNT,s,k,0x18,&ctrl); + } + else if (adlen < 32) { // The last block of AD is even and incomplete + adlen = ad_encryption_ov16(&A,s,k,adlen,CNT,0x08,&ctrl); + nonce_encryption(N,CNT,s,k,0x1a,&ctrl); + } + else if (adlen == 32) { // The last block of AD is even and complete + adlen = ad_encryption_eqov32(&A,s,k,adlen,CNT,0x08,&ctrl); + nonce_encryption(N,CNT,s,k,0x18,&ctrl); + } + else { // A normal full pair of blocks of AD + adlen = ad_encryption_eqov32(&A,s,k,adlen,CNT,0x08,&ctrl); + } + } + + ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1; + + reset_lfsr_gf56(CNT); + + clen = clen -16; + *mlen = clen; + + if (clen == 0) { // C is an empty string + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,0x15,&ctrl); + } + else while (clen > 0) { + if (clen < 16) { // The last block of C is incomplete + clen = msg_decryption_ud16(&M,&c,N,CNT,s,k,0x15,clen,&ctrl); + } + else if (clen == 16) { // The last block of C is complete + clen = msg_decryption_eqov16(&M,&c,N,CNT,s,k,0x14,clen,&ctrl); + } + else { // A normal full message block + clen = msg_decryption_eqov16(&M,&c,N,CNT,s,k,0x04,clen,&ctrl); + } + } + + // Tag generation + g8A_for_Tag_Generation(s, T); + + for (int i = 0; i < 16; i++) { + if (T[i] != (*(c+i))) { + return -1; + } + } + + return 0; + +} diff --git a/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/skinny.h b/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/skinny.h new file mode 100644 index 0000000..c8e7b56 --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/skinny.h @@ -0,0 +1,106 @@ +#define ___SKINNY_LOOP +//#define ___NUM_OF_ROUNDS_56 +#if (defined(__riscv_xlen) && (__riscv_xlen == 64)) +#define ___ENABLE_DWORD_CAST +#endif + +#include + +typedef struct ___skinny_ctrl { +#ifdef ___NUM_OF_ROUNDS_56 + unsigned char roundKeys[960]; // number of rounds : 56 +#else + unsigned char roundKeys[704]; // number of rounds : 40 +#endif + void (*func_skinny_128_384_enc)(unsigned char*, struct ___skinny_ctrl*, unsigned char* CNT, unsigned char* T, const unsigned char* K); +} skinny_ctrl; + +extern void skinny_128_384_enc123_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K); +extern void skinny_128_384_enc12_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K); +extern void skinny_128_384_enc1_1 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K); + +#define pack_word(x0, x1, x2, x3, w) \ + w = ((x3) << 24) ^ \ + ((x2) << 16) ^ \ + ((x1) << 8) ^ \ + (x0); + +#define unpack_word(x0, x1, x2, x3, w) \ + x0 = ((w) & 0xff); \ + x1 = (((w) >> 8) & 0xff); \ + x2 = (((w) >> 16) & 0xff); \ + x3 = ((w) >> 24); + +#ifdef ___ENABLE_DWORD_CAST + +#define PERMUTATION() \ +/* permutation */ \ + \ + /* 7 6 5 4 3 2 1 0 */ \ + /* 5 7 2 3 6 0 4 1 */ \ + \ + /* dw (7 6 5 4 3 2 1 0) */ \ + \ + /* dw (5 7 2 3 6 0 4 1) */ \ + \ + dt0 = dw >> 24; /* - - - 7 6 5 4 3 */ \ + dt0 = dt0 & 0x00000000ff00ff00; /* - - - - 6 - 4 - */ \ + \ + dt1 = dw << 16; /* 5 4 3 2 1 0 - - */ \ + dt1 = dt1 & 0xff00000000ff0000; /* 5 - - - - 0 - - */ \ + dt0 = dt0 ^ dt1; /* 5 - - - 6 0 4 - */ \ + \ + dt1 = dw >> 8; /* - 7 6 5 4 3 2 1 */ \ + dt1 = dt1 & 0x00ff0000000000ff; /* - 7 - - - - - 1 */ \ + dt0 = dt0 ^ dt1; /* 5 7 - - 6 0 4 1 */ \ + \ + dt1 = dw << 8; /* 6 5 4 3 2 1 0 - */ \ + dt1 = dt1 & 0x000000ff00000000; /* - - - 3 - - - - */ \ + dt0 = dt0 ^ dt1; /* 5 7 - 3 6 0 4 1 */ \ + \ + dt1 = dw << 24; /* 4 3 2 1 0 - - - */ \ + dw = dt1 & 0x0000ff0000000000; /* - - 2 - - - - - */ \ + dw = dw ^ dt0; /* 5 7 2 3 6 0 4 1 */ + +#else + +#define PERMUTATION() \ +/* permutation */ \ + \ + /* 7 6 5 4 3 2 1 0 */ \ + /* 5 7 2 3 6 0 4 1 */ \ + \ + /* w0 (3 2 1 0) */ \ + /* w1 (7 6 5 4) */ \ + \ + /* w0 (6 0 4 1) */ \ + /* w1 (5 7 2 3) */ \ + \ + t0 = w1 << 8; /* 6 5 4 - */ \ + t0 = t0 & 0xff00ff00; /* 6 - 4 - */ \ + \ + t1 = w1 << 16; /* 5 4 - - */ \ + t1 = t1 & 0xff000000; /* 5 - - - */ \ + \ + t2 = w1 & 0xff000000; /* 7 - - - */ \ + t2 = t2 >> 8; /* - 7 - - */ \ + t1 = t1 ^ t2; /* 5 7 - - */ \ + \ + t2 = w0 & 0xff000000; /* 3 - - - */ \ + t2 = t2 >> 24; /* - - - 3 */ \ + t1 = t1 ^ t2; /* 5 7 - 3 */ \ + \ + w1 = w0 >> 8; /* - 3 2 1 */ \ + w1 = w1 & 0x0000ff00; /* - - 2 - */ \ + w1 = w1 ^ t1; /* 5 7 2 3 */ \ + \ + t2 = w0 & 0x0000ff00; /* - - 1 - */ \ + t2 = t2 >> 8; /* - - - 1 */ \ + t0 = t0 ^ t2; /* 6 - 4 1 */ \ + \ + w0 = w0 << 16; /* 1 0 - - */ \ + w0 = w0 & 0x00ff0000; /* - 0 - - */ \ + w0 = w0 ^ t0; /* 6 0 4 1 */ + +#endif + diff --git a/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/skinny_key_schedule2.c b/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/skinny_key_schedule2.c new file mode 100644 index 0000000..c2f30de --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/skinny_key_schedule2.c @@ -0,0 +1,431 @@ +/****************************************************************************** + * Copyright (c) 2020, NEC Corporation. + * + * THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND. + * + *****************************************************************************/ + +/* + * SKINNY-128-384 + * + * load * AC(c0 c1) ^ TK3 + * calc AC(c0 c1) ^ TK2 -> store + * ART(TK2) + * + * number of rounds : 40 or 56 + */ + +#include "skinny.h" + +#ifdef ___ENABLE_DWORD_CAST + +#define PERMUTATION_TK2() \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK2) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x6 x5 x4 x3 x2 x1 x0 x7^x5) */ \ + dw = ((dw << 1) & 0xfefefefefefefefe) ^ \ + (((dw >> 7) ^ (dw >> 5)) & 0x0101010101010101); \ + \ + /* Load TK3 */ \ + /* TK2^TK3^AC(c0 c1) */ \ + /* store */ \ + *tk2 = dw ^ *tk3; \ + tk2 += 2; \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys) +{ + uint64_t* tk2; // used in MACRO + uint64_t* tk3; // used in MACRO + uint64_t dt0; // used in MACRO + uint64_t dt1; // used in MACRO + uint64_t dw; + + // odd + + // load master key + // load master key + dw = *(uint64_t*)&roundKeys[16]; + + tk2 = (uint64_t*)&roundKeys[64]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[384]; +#else + tk3 = (uint64_t*)&roundKeys[512]; +#endif + + // 1st round + *tk2 = dw ^ *tk3; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th,43th, ... ,51th,53th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + + // even + + // load master key + dw = *(uint64_t*)&roundKeys[24]; + + tk2 = (uint64_t*)&roundKeys[72]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[392]; +#else + tk3 = (uint64_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys) +{ + uint64_t* tk2; // used in MACRO + uint64_t* tk3; // used in MACRO + uint64_t dt0; // used in MACRO + uint64_t dt1; // used in MACRO + uint64_t dw; + + // odd + + // load master key + dw = *(uint64_t*)&roundKeys[16]; + + tk2 = (uint64_t*)&roundKeys[64]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[384]; +#else + tk3 = (uint64_t*)&roundKeys[512]; +#endif + + // 1st round + *tk2 = dw ^ *tk3; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + PERMUTATION_TK2(); + } + + // even + + // load master key + dw = *(uint64_t*)&roundKeys[24]; + + tk2 = (uint64_t*)&roundKeys[72]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[392]; +#else + tk3 = (uint64_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + PERMUTATION_TK2(); + } + +} + +#endif /* ___SKINNY_LOOP */ + +#else /* ___ENABLE_DWORD_CAST */ + +#define PERMUTATION_TK2() \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK2) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x6 x5 x4 x3 x2 x1 x0 x7^x5) */ \ + w0 = ((w0 << 1) & 0xfefefefe) ^ \ + (((w0 >> 7) ^ (w0 >> 5)) & 0x01010101); \ + w1 = ((w1 << 1) & 0xfefefefe) ^ \ + (((w1 >> 7) ^ (w1 >> 5)) & 0x01010101); \ + \ + /* Load TK3 */ \ + /* TK2^TK3^AC(c0 c1) */ \ + /* store */ \ + *tk2++ = w0 ^ *tk3++; \ + *tk2++ = w1 ^ *tk3++; \ + tk2 += 2; \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys) +{ + uint32_t* tk2; // used in MACRO + uint32_t* tk3; // used in MACRO + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + + // odd + + // load master key + w0 = *(uint32_t*)&roundKeys[16]; + w1 = *(uint32_t*)&roundKeys[20]; + + tk2 = (uint32_t*)&roundKeys[64]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[384]; +#else + tk3 = (uint32_t*)&roundKeys[512]; +#endif + + // 1st round + *tk2++ = w0 ^ *tk3++; + *tk2++ = w1 ^ *tk3++; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th,43th, ... ,51th,53th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + + // even + + // load master key + w0 = *(uint32_t*)&roundKeys[24]; + w1 = *(uint32_t*)&roundKeys[28]; + + tk2 = (uint32_t*)&roundKeys[72]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[392]; +#else + tk3 = (uint32_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys) +{ + uint32_t* tk2; // used in MACRO + uint32_t* tk3; // used in MACRO + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + + // odd + + // load master key + w0 = *(uint32_t*)&roundKeys[16]; + w1 = *(uint32_t*)&roundKeys[20]; + + tk2 = (uint32_t*)&roundKeys[64]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[384]; +#else + tk3 = (uint32_t*)&roundKeys[512]; +#endif + + // 1st round + *tk2++ = w0 ^ *tk3++; + *tk2++ = w1 ^ *tk3++; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + PERMUTATION_TK2(); + } + + // even + + // load master key + w0 = *(uint32_t*)&roundKeys[24]; + w1 = *(uint32_t*)&roundKeys[28]; + + tk2 = (uint32_t*)&roundKeys[72]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[392]; +#else + tk3 = (uint32_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + PERMUTATION_TK2(); + } + +} + +#endif /* ___SKINNY_LOOP */ + +#endif /* ___ENABLE_DWORD_CAST */ + diff --git a/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/skinny_key_schedule3.c b/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/skinny_key_schedule3.c new file mode 100644 index 0000000..5dcaf7f --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/skinny_key_schedule3.c @@ -0,0 +1,428 @@ +/****************************************************************************** + * Copyright (c) 2020, NEC Corporation. + * + * THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND. + * + *****************************************************************************/ + +/* + * SKINNY-128-384 + * + * AC(c0 c1) ^ TK3 -> store + * ART(TK3) + * + * number of rounds : 40 or 56 + */ + +#include "skinny.h" + +#ifdef ___ENABLE_DWORD_CAST + +#define PERMUTATION_TK3(c0Val, c1Val) \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK3) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x0^x6 x7 x6 x5 x4 x3 x2 x1) */ \ + dw = ((dw >> 1) & 0x7f7f7f7f7f7f7f7f) ^ \ + (((dw << 7) ^ (dw << 1)) & 0x8080808080808080); \ + \ + /* K3^AC(c0 c1) */ \ + /* store */ \ + dt0 = dw ^ c0Val; \ + *tk3 = dt0 ^ ((uint64_t)c1Val << 40); \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys) +{ + uint64_t *tk3; + uint64_t dt0; // used in MACRO + uint64_t dt1; // used in MACRO + uint64_t dw; + + // odd + + // load master key + dw = *(uint64_t*)&roundKeys[32]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[384]; +#else + tk3 = (uint64_t*)&roundKeys[512]; +#endif + + // 1st round + *tk3++ = dw ^ 0x01; + tk3 += 1; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK3(0x7, 0x0); + PERMUTATION_TK3(0xf, 0x1); + PERMUTATION_TK3(0xd, 0x3); + PERMUTATION_TK3(0x7, 0x3); + PERMUTATION_TK3(0xe, 0x1); + PERMUTATION_TK3(0x9, 0x3); + PERMUTATION_TK3(0x7, 0x2); + PERMUTATION_TK3(0xd, 0x1); + PERMUTATION_TK3(0x5, 0x3); + + PERMUTATION_TK3(0x6, 0x1); + PERMUTATION_TK3(0x8, 0x1); + PERMUTATION_TK3(0x1, 0x2); + PERMUTATION_TK3(0x5, 0x0); + PERMUTATION_TK3(0x7, 0x1); + PERMUTATION_TK3(0xc, 0x1); + PERMUTATION_TK3(0x1, 0x3); + PERMUTATION_TK3(0x6, 0x0); + PERMUTATION_TK3(0xb, 0x1); + PERMUTATION_TK3(0xd, 0x2); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41td,43th, ... ,53th,55th round + PERMUTATION_TK3(0x4, 0x3); + PERMUTATION_TK3(0x2, 0x1); + PERMUTATION_TK3(0x8, 0x0); + PERMUTATION_TK3(0x2, 0x2); + PERMUTATION_TK3(0x9, 0x0); + PERMUTATION_TK3(0x6, 0x2); + PERMUTATION_TK3(0x9, 0x1); + PERMUTATION_TK3(0x5, 0x2); + +#endif + + // even + + // load master key + dw = *(uint64_t*)&roundKeys[40]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[392]; +#else + tk3 = (uint64_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... ,38th,40th round + PERMUTATION_TK3(0x3, 0x0); + PERMUTATION_TK3(0xf, 0x0); + PERMUTATION_TK3(0xe, 0x3); + PERMUTATION_TK3(0xb, 0x3); + PERMUTATION_TK3(0xf, 0x2); + PERMUTATION_TK3(0xc, 0x3); + PERMUTATION_TK3(0x3, 0x3); + PERMUTATION_TK3(0xe, 0x0); + PERMUTATION_TK3(0xa, 0x3); + PERMUTATION_TK3(0xb, 0x2); + + PERMUTATION_TK3(0xc, 0x2); + PERMUTATION_TK3(0x0, 0x3); + PERMUTATION_TK3(0x2, 0x0); + PERMUTATION_TK3(0xb, 0x0); + PERMUTATION_TK3(0xe, 0x2); + PERMUTATION_TK3(0x8, 0x3); + PERMUTATION_TK3(0x3, 0x2); + PERMUTATION_TK3(0xd, 0x0); + PERMUTATION_TK3(0x6, 0x3); + PERMUTATION_TK3(0xa, 0x1); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK3(0x9, 0x2); + PERMUTATION_TK3(0x4, 0x2); + PERMUTATION_TK3(0x1, 0x1); + PERMUTATION_TK3(0x4, 0x0); + PERMUTATION_TK3(0x3, 0x1); + PERMUTATION_TK3(0xc, 0x0); + PERMUTATION_TK3(0x2, 0x3); + PERMUTATION_TK3(0xa, 0x0); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys, unsigned char *pRC) +{ + uint64_t *tk3; + uint64_t dt0; // used in MACRO + uint64_t dt1; // used in MACRO + uint64_t dw; + uint64_t c0; + uint64_t c1; + + // odd + + // load master key + dw = *(uint64_t*)&roundKeys[32]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[384]; +#else + tk3 = (uint64_t*)&roundKeys[512]; +#endif + + // 1st round + *tk3++ = dw ^ 0x01; + tk3 += 1; + + pRC += 4; + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + + // even + + // load master key + dw = *(uint64_t*)&roundKeys[40]; + +#ifndef ___NUM_OF_ROUNDS_56 + pRC -= 78; + tk3 = (uint64_t*)&roundKeys[392]; +#else + pRC -= 110; + tk3 = (uint64_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + +} + +#endif /* ___SKINNY_LOOP */ + +#else /* ___ENABLE_DWORD_CAST */ + +#define PERMUTATION_TK3(c0Val, c1Val) \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK3) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x0^x6 x7 x6 x5 x4 x3 x2 x1) */ \ + w0 = ((w0 >> 1) & 0x7f7f7f7f) ^ \ + (((w0 << 7) ^ (w0 << 1)) & 0x80808080); \ + w1 = ((w1 >> 1) & 0x7f7f7f7f) ^ \ + (((w1 << 7) ^ (w1 << 1)) & 0x80808080); \ + \ + /* K3^AC(c0 c1) */ \ + /* store */ \ + *tk3++ = w0 ^ c0Val; \ + *tk3++ = w1 ^ ((uint32_t)c1Val << 8); \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys) +{ + uint32_t *tk3; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + + // odd + + // load master key + w0 = *(uint32_t*)&roundKeys[32]; + w1 = *(uint32_t*)&roundKeys[36]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[384]; +#else + tk3 = (uint32_t*)&roundKeys[512]; +#endif + + // 1st round + *tk3++ = w0 ^ 0x01; + *tk3++ = w1; + tk3 += 2; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK3(0x7, 0x0); + PERMUTATION_TK3(0xf, 0x1); + PERMUTATION_TK3(0xd, 0x3); + PERMUTATION_TK3(0x7, 0x3); + PERMUTATION_TK3(0xe, 0x1); + PERMUTATION_TK3(0x9, 0x3); + PERMUTATION_TK3(0x7, 0x2); + PERMUTATION_TK3(0xd, 0x1); + PERMUTATION_TK3(0x5, 0x3); + + PERMUTATION_TK3(0x6, 0x1); + PERMUTATION_TK3(0x8, 0x1); + PERMUTATION_TK3(0x1, 0x2); + PERMUTATION_TK3(0x5, 0x0); + PERMUTATION_TK3(0x7, 0x1); + PERMUTATION_TK3(0xc, 0x1); + PERMUTATION_TK3(0x1, 0x3); + PERMUTATION_TK3(0x6, 0x0); + PERMUTATION_TK3(0xb, 0x1); + PERMUTATION_TK3(0xd, 0x2); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41td,43th, ... ,53th,55th round + PERMUTATION_TK3(0x4, 0x3); + PERMUTATION_TK3(0x2, 0x1); + PERMUTATION_TK3(0x8, 0x0); + PERMUTATION_TK3(0x2, 0x2); + PERMUTATION_TK3(0x9, 0x0); + PERMUTATION_TK3(0x6, 0x2); + PERMUTATION_TK3(0x9, 0x1); + PERMUTATION_TK3(0x5, 0x2); + +#endif + + // even + + // load master key + w0 = *(uint32_t*)&roundKeys[40]; + w1 = *(uint32_t*)&roundKeys[44]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[392]; +#else + tk3 = (uint32_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... ,38th,40th round + PERMUTATION_TK3(0x3, 0x0); + PERMUTATION_TK3(0xf, 0x0); + PERMUTATION_TK3(0xe, 0x3); + PERMUTATION_TK3(0xb, 0x3); + PERMUTATION_TK3(0xf, 0x2); + PERMUTATION_TK3(0xc, 0x3); + PERMUTATION_TK3(0x3, 0x3); + PERMUTATION_TK3(0xe, 0x0); + PERMUTATION_TK3(0xa, 0x3); + PERMUTATION_TK3(0xb, 0x2); + + PERMUTATION_TK3(0xc, 0x2); + PERMUTATION_TK3(0x0, 0x3); + PERMUTATION_TK3(0x2, 0x0); + PERMUTATION_TK3(0xb, 0x0); + PERMUTATION_TK3(0xe, 0x2); + PERMUTATION_TK3(0x8, 0x3); + PERMUTATION_TK3(0x3, 0x2); + PERMUTATION_TK3(0xd, 0x0); + PERMUTATION_TK3(0x6, 0x3); + PERMUTATION_TK3(0xa, 0x1); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK3(0x9, 0x2); + PERMUTATION_TK3(0x4, 0x2); + PERMUTATION_TK3(0x1, 0x1); + PERMUTATION_TK3(0x4, 0x0); + PERMUTATION_TK3(0x3, 0x1); + PERMUTATION_TK3(0xc, 0x0); + PERMUTATION_TK3(0x2, 0x3); + PERMUTATION_TK3(0xa, 0x0); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys, unsigned char *pRC) +{ + uint32_t *tk3; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + uint32_t c0; + uint32_t c1; + + // odd + + // load master key + w0 = *(uint32_t*)&roundKeys[32]; + w1 = *(uint32_t*)&roundKeys[36]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[384]; +#else + tk3 = (uint32_t*)&roundKeys[512]; +#endif + + // 1st round + *tk3++ = w0 ^ 0x01; + *tk3++ = w1; + tk3 += 2; + + pRC += 4; + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + + // even + + // load master key + w0 = *(uint32_t*)&roundKeys[40]; + w1 = *(uint32_t*)&roundKeys[44]; + +#ifndef ___NUM_OF_ROUNDS_56 + pRC -= 78; + tk3 = (uint32_t*)&roundKeys[392]; +#else + pRC -= 110; + tk3 = (uint32_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + +} + +#endif /* ___SKINNY_LOOP */ + +#endif /* ___ENABLE_DWORD_CAST */ + diff --git a/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/skinny_main.c b/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/skinny_main.c new file mode 100644 index 0000000..8a6e75f --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/skinny_main.c @@ -0,0 +1,675 @@ +/****************************************************************************** + * Copyright (c) 2020, NEC Corporation. + * + * THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND. + * + *****************************************************************************/ + +/* + * SKINNY-128-384 + * + * ART(TK1) -> store + * load AC(c0 c1) ^ TK3 ^ TK2 + * load TK1 + * calc AC(c0 c1) ^ TK3 ^ TK2 ^ TK1 -> use at (AC->ART) + * SC->SR->(AC->ART)->MC + * + * number of rounds : 40 or 56 + */ + +#include "skinny.h" + +/* + * S-BOX + */ +unsigned char SBOX[] += { + // Original + 0x65, 0x4c, 0x6a, 0x42, 0x4b, 0x63, 0x43, 0x6b, 0x55, 0x75, 0x5a, 0x7a, 0x53, 0x73, 0x5b, 0x7b, + 0x35, 0x8c, 0x3a, 0x81, 0x89, 0x33, 0x80, 0x3b, 0x95, 0x25, 0x98, 0x2a, 0x90, 0x23, 0x99, 0x2b, + 0xe5, 0xcc, 0xe8, 0xc1, 0xc9, 0xe0, 0xc0, 0xe9, 0xd5, 0xf5, 0xd8, 0xf8, 0xd0, 0xf0, 0xd9, 0xf9, + 0xa5, 0x1c, 0xa8, 0x12, 0x1b, 0xa0, 0x13, 0xa9, 0x05, 0xb5, 0x0a, 0xb8, 0x03, 0xb0, 0x0b, 0xb9, + 0x32, 0x88, 0x3c, 0x85, 0x8d, 0x34, 0x84, 0x3d, 0x91, 0x22, 0x9c, 0x2c, 0x94, 0x24, 0x9d, 0x2d, + 0x62, 0x4a, 0x6c, 0x45, 0x4d, 0x64, 0x44, 0x6d, 0x52, 0x72, 0x5c, 0x7c, 0x54, 0x74, 0x5d, 0x7d, + 0xa1, 0x1a, 0xac, 0x15, 0x1d, 0xa4, 0x14, 0xad, 0x02, 0xb1, 0x0c, 0xbc, 0x04, 0xb4, 0x0d, 0xbd, + 0xe1, 0xc8, 0xec, 0xc5, 0xcd, 0xe4, 0xc4, 0xed, 0xd1, 0xf1, 0xdc, 0xfc, 0xd4, 0xf4, 0xdd, 0xfd, + 0x36, 0x8e, 0x38, 0x82, 0x8b, 0x30, 0x83, 0x39, 0x96, 0x26, 0x9a, 0x28, 0x93, 0x20, 0x9b, 0x29, + 0x66, 0x4e, 0x68, 0x41, 0x49, 0x60, 0x40, 0x69, 0x56, 0x76, 0x58, 0x78, 0x50, 0x70, 0x59, 0x79, + 0xa6, 0x1e, 0xaa, 0x11, 0x19, 0xa3, 0x10, 0xab, 0x06, 0xb6, 0x08, 0xba, 0x00, 0xb3, 0x09, 0xbb, + 0xe6, 0xce, 0xea, 0xc2, 0xcb, 0xe3, 0xc3, 0xeb, 0xd6, 0xf6, 0xda, 0xfa, 0xd3, 0xf3, 0xdb, 0xfb, + 0x31, 0x8a, 0x3e, 0x86, 0x8f, 0x37, 0x87, 0x3f, 0x92, 0x21, 0x9e, 0x2e, 0x97, 0x27, 0x9f, 0x2f, + 0x61, 0x48, 0x6e, 0x46, 0x4f, 0x67, 0x47, 0x6f, 0x51, 0x71, 0x5e, 0x7e, 0x57, 0x77, 0x5f, 0x7f, + 0xa2, 0x18, 0xae, 0x16, 0x1f, 0xa7, 0x17, 0xaf, 0x01, 0xb2, 0x0e, 0xbe, 0x07, 0xb7, 0x0f, 0xbf, + 0xe2, 0xca, 0xee, 0xc6, 0xcf, 0xe7, 0xc7, 0xef, 0xd2, 0xf2, 0xde, 0xfe, 0xd7, 0xf7, 0xdf, 0xff, +}; + + /* + * S-BOX ^ AC(c2) + */ +unsigned char SBOX2[] += { // Original ^ c2(0x02) + 0x67, 0x4e, 0x68, 0x40, 0x49, 0x61, 0x41, 0x69, 0x57, 0x77, 0x58, 0x78, 0x51, 0x71, 0x59, 0x79, + 0x37, 0x8e, 0x38, 0x83, 0x8b, 0x31, 0x82, 0x39, 0x97, 0x27, 0x9a, 0x28, 0x92, 0x21, 0x9b, 0x29, + 0xe7, 0xce, 0xea, 0xc3, 0xcb, 0xe2, 0xc2, 0xeb, 0xd7, 0xf7, 0xda, 0xfa, 0xd2, 0xf2, 0xdb, 0xfb, + 0xa7, 0x1e, 0xaa, 0x10, 0x19, 0xa2, 0x11, 0xab, 0x07, 0xb7, 0x08, 0xba, 0x01, 0xb2, 0x09, 0xbb, + 0x30, 0x8a, 0x3e, 0x87, 0x8f, 0x36, 0x86, 0x3f, 0x93, 0x20, 0x9e, 0x2e, 0x96, 0x26, 0x9f, 0x2f, + 0x60, 0x48, 0x6e, 0x47, 0x4f, 0x66, 0x46, 0x6f, 0x50, 0x70, 0x5e, 0x7e, 0x56, 0x76, 0x5f, 0x7f, + 0xa3, 0x18, 0xae, 0x17, 0x1f, 0xa6, 0x16, 0xaf, 0x00, 0xb3, 0x0e, 0xbe, 0x06, 0xb6, 0x0f, 0xbf, + 0xe3, 0xca, 0xee, 0xc7, 0xcf, 0xe6, 0xc6, 0xef, 0xd3, 0xf3, 0xde, 0xfe, 0xd6, 0xf6, 0xdf, 0xff, + 0x34, 0x8c, 0x3a, 0x80, 0x89, 0x32, 0x81, 0x3b, 0x94, 0x24, 0x98, 0x2a, 0x91, 0x22, 0x99, 0x2b, + 0x64, 0x4c, 0x6a, 0x43, 0x4b, 0x62, 0x42, 0x6b, 0x54, 0x74, 0x5a, 0x7a, 0x52, 0x72, 0x5b, 0x7b, + 0xa4, 0x1c, 0xa8, 0x13, 0x1b, 0xa1, 0x12, 0xa9, 0x04, 0xb4, 0x0a, 0xb8, 0x02, 0xb1, 0x0b, 0xb9, + 0xe4, 0xcc, 0xe8, 0xc0, 0xc9, 0xe1, 0xc1, 0xe9, 0xd4, 0xf4, 0xd8, 0xf8, 0xd1, 0xf1, 0xd9, 0xf9, + 0x33, 0x88, 0x3c, 0x84, 0x8d, 0x35, 0x85, 0x3d, 0x90, 0x23, 0x9c, 0x2c, 0x95, 0x25, 0x9d, 0x2d, + 0x63, 0x4a, 0x6c, 0x44, 0x4d, 0x65, 0x45, 0x6d, 0x53, 0x73, 0x5c, 0x7c, 0x55, 0x75, 0x5d, 0x7d, + 0xa0, 0x1a, 0xac, 0x14, 0x1d, 0xa5, 0x15, 0xad, 0x03, 0xb0, 0x0c, 0xbc, 0x05, 0xb5, 0x0d, 0xbd, + 0xe0, 0xc8, 0xec, 0xc4, 0xcd, 0xe5, 0xc5, 0xed, 0xd0, 0xf0, 0xdc, 0xfc, 0xd5, 0xf5, 0xdd, 0xfd, +}; + +#ifdef ___SKINNY_LOOP +/* + * Round Constants + */ +unsigned char RC[] += { + 0x01, 0x00, 0x03, 0x00, 0x07, 0x00, 0x0f, 0x00, 0x0f, 0x01, 0x0e, 0x03, 0x0d, 0x03, 0x0b, 0x03, + 0x07, 0x03, 0x0f, 0x02, 0x0e, 0x01, 0x0c, 0x03, 0x09, 0x03, 0x03, 0x03, 0x07, 0x02, 0x0e, 0x00, + 0x0d, 0x01, 0x0a, 0x03, 0x05, 0x03, 0x0b, 0x02, 0x06, 0x01, 0x0c, 0x02, 0x08, 0x01, 0x00, 0x03, + 0x01, 0x02, 0x02, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x07, 0x01, 0x0e, 0x02, 0x0c, 0x01, 0x08, 0x03, + 0x01, 0x03, 0x03, 0x02, 0x06, 0x00, 0x0d, 0x00, 0x0b, 0x01, 0x06, 0x03, 0x0d, 0x02, 0x0a, 0x01, +#ifdef ___NUM_OF_ROUNDS_56 + 0x04, 0x03, 0x09, 0x02, 0x02, 0x01, 0x04, 0x02, 0x08, 0x00, 0x01, 0x01, 0x02, 0x02, 0x04, 0x00, + 0x09, 0x00, 0x03, 0x01, 0x06, 0x02, 0x0c, 0x00, 0x09, 0x01, 0x02, 0x03, 0x05, 0x02, 0x0a, 0x00, +#endif + }; +#endif + +extern void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2); +extern void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys); +#ifdef ___SKINNY_LOOP +extern void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys, unsigned char *pRC); +#else +extern void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys); +#endif + +void skinny_128_384_enc123_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K) +{ + uint32_t *pt = (uint32_t*)&pskinny_ctrl->roundKeys[0]; + + pt[0] = *(uint32_t*)(&CNT[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + pt[4] = *(uint32_t*)(&T[0]); + pack_word(T[7], T[4], T[5], T[6], pt[5]); + pt[6] = *(uint32_t*)(&T[8]); + pack_word(T[15], T[12], T[13], T[14], pt[7]); + + pt[8] = *(uint32_t*)(&K[0]); + pack_word(K[7], K[4], K[5], K[6], pt[9]); + pt[10] = *(uint32_t*)(&K[8]); + pack_word(K[15], K[12], K[13], K[14], pt[11]); + +#ifdef ___SKINNY_LOOP + RunEncryptionKeyScheduleTK3(pskinny_ctrl->roundKeys, RC); +#else + RunEncryptionKeyScheduleTK3(pskinny_ctrl->roundKeys); +#endif + RunEncryptionKeyScheduleTK2(pskinny_ctrl->roundKeys); + Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2); + + pskinny_ctrl->func_skinny_128_384_enc = skinny_128_384_enc12_12; + +} + +void skinny_128_384_enc12_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K) +{ + (void)K; + + uint32_t *pt = &pskinny_ctrl->roundKeys[0]; + + pt[0] = *(uint32_t*)(&CNT[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + pt[4] = *(uint32_t*)(&T[0]); + pack_word(T[7], T[4], T[5], T[6], pt[5]); + pt[6] = *(uint32_t*)(&T[8]); + pack_word(T[15], T[12], T[13], T[14], pt[7]); + + RunEncryptionKeyScheduleTK2(pskinny_ctrl->roundKeys); + Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2); + +} + +extern void skinny_128_384_enc1_1 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K) +{ + (void)T; + (void)K; + + uint32_t *pt = &pskinny_ctrl->roundKeys[0]; + + pt[0] = *(uint32_t*)(&CNT[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2); + +} + +#define PERMUTATION_TK1() \ + \ +/* permutation */ \ +{ \ + unsigned char tmp0 = roundKeys[0]; \ + unsigned char tmp1 = roundKeys[1]; \ + unsigned char tmp2 = roundKeys[2]; \ + unsigned char tmp3 = roundKeys[3]; \ + unsigned char tmp4 = roundKeys[4]; \ + unsigned char tmp5 = roundKeys[5]; \ + unsigned char tmp6 = roundKeys[6]; \ + unsigned char tmp7 = roundKeys[7]; \ + \ + unsigned char* dst = &roundKeys[8]; \ + \ + /* 5 7 2 3 6 0 4 1 */ \ + *dst++ = tmp1; \ + *dst++ = tmp4; \ + *dst++ = tmp0; \ + *dst++ = tmp6; \ + *dst++ = tmp3; \ + *dst++ = tmp2; \ + *dst++ = tmp7; \ + *dst++ = tmp5; \ + \ + /* 2 5 0 6 7 1 3 4 */ \ + *dst++ = tmp4; \ + *dst++ = tmp3; \ + *dst++ = tmp1; \ + *dst++ = tmp7; \ + *dst++ = tmp6; \ + *dst++ = tmp0; \ + *dst++ = tmp5; \ + *dst++ = tmp2; \ + \ + /* 0 2 1 7 5 4 6 3 */ \ + *dst++ = tmp3; \ + *dst++ = tmp6; \ + *dst++ = tmp4; \ + *dst++ = tmp5; \ + *dst++ = tmp7; \ + *dst++ = tmp1; \ + *dst++ = tmp2; \ + *dst++ = tmp0; \ + \ + /* 1 0 4 5 2 3 7 6 */ \ + *dst++ = tmp6; \ + *dst++ = tmp7; \ + *dst++ = tmp3; \ + *dst++ = tmp2; \ + *dst++ = tmp5; \ + *dst++ = tmp4; \ + *dst++ = tmp0; \ + *dst++ = tmp1; \ + \ + /* 4 1 3 2 0 6 5 7 */ \ + *dst++ = tmp7; \ + *dst++ = tmp5; \ + *dst++ = tmp6; \ + *dst++ = tmp0; \ + *dst++ = tmp2; \ + *dst++ = tmp3; \ + *dst++ = tmp1; \ + *dst++ = tmp4; \ + \ + /* 3 4 6 0 1 7 2 5 */ \ + *dst++ = tmp5; \ + *dst++ = tmp2; \ + *dst++ = tmp7; \ + *dst++ = tmp1; \ + *dst++ = tmp0; \ + *dst++ = tmp6; \ + *dst++ = tmp4; \ + *dst++ = tmp3; \ + \ + /* 6 3 7 1 4 5 0 2 */ \ + *dst++ = tmp2; \ + *dst++ = tmp0; \ + *dst++ = tmp5; \ + *dst++ = tmp4; \ + *dst++ = tmp1; \ + *dst++ = tmp7; \ + *dst++ = tmp3; \ + *dst++ = tmp6; \ +} + +#define SBOX_0(b0, b1, b2, b3) \ + \ + t0 = sbox[b0]; \ + t1 = sbox[b1]; \ + t2 = sbox[b2]; \ + t3 = sbox[b3]; \ + \ + b0 = (uint8_t)t0; \ + b1 = (uint8_t)t1; \ + b2 = (uint8_t)t2; \ + b3 = (uint8_t)t3; + +#define SBOX_8(b0, b1, b2, b3) \ + \ + t0 = sbox[b0]; \ + t1 = sbox[b1]; \ + t2 = sbox[b2]; \ + t3 = sbox[b3]; \ + \ + b0 = (uint8_t)t3; \ + b1 = (uint8_t)t0; \ + b2 = (uint8_t)t1; \ + b3 = (uint8_t)t2; + +#define SBOX_16(b0, b1, b2, b3) \ + \ + t0 = sbox2[b0]; /* AC(c2) */ \ + t1 = sbox[b1]; \ + t2 = sbox[b2]; \ + t3 = sbox[b3]; \ + \ + b0 = (uint8_t)t2; \ + b1 = (uint8_t)t3; \ + b2 = (uint8_t)t0; \ + b3 = (uint8_t)t1; + +#define SBOX_24(b0, b1, b2, b3) \ + \ + t0 = sbox[b0]; \ + t1 = sbox[b1]; \ + t2 = sbox[b2]; \ + t3 = sbox[b3]; \ + \ + b0 = (uint8_t)t1; \ + b1 = (uint8_t)t2; \ + b2 = (uint8_t)t3; \ + b3 = (uint8_t)t0; + +#ifdef ___ENABLE_DWORD_CAST + +#define SKINNY_MAIN() \ +{ \ + \ + /* odd */ \ + \ + /* LUT(with ShiftRows & AC(c2))*/ \ + \ + SBOX_0( block[0], block[1], block[2], block[3]); \ + SBOX_8( block[4], block[5], block[6], block[7]); \ + SBOX_16(block[8], block[9], block[10], block[11]); \ + SBOX_24(block[12], block[13], block[14], block[15]); \ + \ + /* TK1^TK2^TK3^AC(c0 c1) */ \ + \ + t1 = *(uint64_t*)&block[0]; \ + t1 ^= *tk1++; \ + t1 ^= *tk2++; \ + \ + /* MC */ \ + \ + t2 = *(uint64_t*)&block[8]; \ + t0 = t2 >> 32; \ + \ + /* 0^2 */ \ + t3 = t1 ^ t2; \ + \ + /* 1^2 */ \ + t2 = (t1 >> 32) ^ t2; \ + \ + /* 0^2^3 */ \ + t0 = t0 ^ t3; \ + \ + *(uint32_t*)&block[0] = (uint32_t)t0; \ + *(uint32_t*)&block[4] = (uint32_t)t1; \ + *(uint32_t*)&block[8] = (uint32_t)t2; \ + *(uint32_t*)&block[12] = (uint32_t)t3; \ + \ + /* even */ \ + \ + /* LUT(with ShiftRows & AC(c2))*/ \ + \ + SBOX_0( block[0], block[1], block[2], block[3]); \ + SBOX_8( block[4], block[5], block[6], block[7]); \ + SBOX_16(block[8], block[9], block[10], block[11]); \ + SBOX_24(block[12], block[13], block[14], block[15]); \ + \ + /* TK2^TK3^AC(c0 c1) */ \ + \ + t1 = *(uint64_t*)&block[0]; \ + t1 ^= *tk2++; \ + \ + /* MC */ \ + \ + t2 = *(uint64_t*)&block[8]; \ + t0 = t2 >> 32; \ + \ + /* 0^2 */ \ + t3 = t1 ^ t2; \ + \ + /* 1^2 */ \ + t2 = (t1 >> 32) ^ t2; \ + \ + /* 0^2^3 */ \ + t0 = t0 ^ t3; \ + \ + *(uint32_t*)&block[0] = (uint32_t)t0; \ + *(uint32_t*)&block[4] = (uint32_t)t1; \ + *(uint32_t*)&block[8] = (uint32_t)t2; \ + *(uint32_t*)&block[12] = (uint32_t)t3; \ +} + +#ifndef ___SKINNY_LOOP + +void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint64_t *tk1; + uint64_t *tk2; + uint64_t t0; // used in MACRO + uint64_t t1; // used in MACRO + uint64_t t2; // used in MACRO + uint64_t t3; // used in MACRO + +// TK1 + + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + + tk2 = (uint64_t*)&roundKeys[64]; + tk1 = (uint64_t*)&roundKeys[0]; + + // 1st, ...,16th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint64_t*)&roundKeys[0]; + + // 17th, ...,32th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint64_t*)&roundKeys[0]; + + // 33th, ...,40th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th, ...,48th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint64_t*)&roundKeys[0]; + + // 49th, ... ,56th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint64_t *tk1; + uint64_t *tk2; + uint64_t t0; // used in MACRO + uint64_t t1; // used in MACRO + uint64_t t2; // used in MACRO + uint64_t t3; // used in MACRO + +// TK1 + + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + + tk2 = (uint64_t*)&roundKeys[64]; + + // 1st, ... ,32th or 48th round +#ifndef ___NUM_OF_ROUNDS_56 + for(int j=0;j<2;j++) +#else + for(int j=0;j<3;j++) +#endif + { + tk1 = (uint64_t*)&roundKeys[0]; + for(int i=0;i<8;i++) + { + SKINNY_MAIN(); + } + } + + // 33th , ... ,40th or 49th, .... ,56th round + { + tk1 = (uint64_t*)&roundKeys[0]; + for(int i=0;i<4;i++) + { + SKINNY_MAIN(); + } + } +} + +#endif /* ___SKINNY_LOOP */ + +#else /* ___ENABLE_DWORD_CAST */ + +#define SKINNY_MAIN() \ +{ \ + \ + /* odd */ \ + \ + /* LUT(with ShiftRows & AC(c2))*/ \ + \ + SBOX_0( block[0], block[1], block[2], block[3]); \ + SBOX_8( block[4], block[5], block[6], block[7]); \ + SBOX_16(block[8], block[9], block[10], block[11]); \ + SBOX_24(block[12], block[13], block[14], block[15]); \ + \ + /* TK1^TK2^TK3^AC(c0 c1) */ \ + \ + t1 = *(uint32_t*)&block[0]; \ + t0 = *(uint32_t*)&block[4]; \ + t1 ^= *tk1++; \ + t1 ^= *tk2++; \ + t0 ^= *tk1++; \ + t0 ^= *tk2++; \ + \ + /* MC */ \ + \ + t2 = *(uint32_t*)&block[8]; \ + t4 = *(uint32_t*)&block[12]; \ + \ + /* 0^2 */ \ + t3 = t1 ^ t2; \ + \ + /* 1^2 */ \ + t2 = t0 ^ t2; \ + \ + /* 0^2^3 */ \ + t0 = t3 ^ t4; \ + \ + *(uint32_t*)&block[0] = t0; \ + *(uint32_t*)&block[4] = t1; \ + *(uint32_t*)&block[8] = t2; \ + *(uint32_t*)&block[12] = t3; \ + \ + /* even */ \ + \ + /* LUT(with ShiftRows & AC(c2))*/ \ + \ + SBOX_0( block[0], block[1], block[2], block[3]); \ + SBOX_8( block[4], block[5], block[6], block[7]); \ + SBOX_16(block[8], block[9], block[10], block[11]); \ + SBOX_24(block[12], block[13], block[14], block[15]); \ + \ + /* TK2^TK3^AC(c0 c1) */ \ + \ + t1 = *(uint32_t*)&block[0]; \ + t0 = *(uint32_t*)&block[4]; \ + t1 ^= *tk2++; \ + t0 ^= *tk2++; \ + \ + /* MC */ \ + \ + t2 = *(uint32_t*)&block[8]; \ + t4 = *(uint32_t*)&block[12]; \ + \ + /* 0^2 */ \ + t3 = t1 ^ t2; \ + \ + /* 1^2 */ \ + t2 = t0 ^ t2; \ + \ + /* 0^2^3 */ \ + t0 = t3 ^ t4; \ + \ + *(uint32_t*)&block[0] = t0; \ + *(uint32_t*)&block[4] = t1; \ + *(uint32_t*)&block[8] = t2; \ + *(uint32_t*)&block[12] = t3; \ +} + +#ifndef ___SKINNY_LOOP + +void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint32_t *tk1; + uint32_t *tk2; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t t3; // used in MACRO + uint32_t t4; // used in MACRO + +// TK1 + + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + + tk2 = (uint32_t*)&roundKeys[64]; + tk1 = (uint32_t*)&roundKeys[0]; + + // 1st, ...,16th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint32_t*)&roundKeys[0]; + + // 17th, ...,32th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint32_t*)&roundKeys[0]; + + // 33th, ...,40th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th, ...,48th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint32_t*)&roundKeys[0]; + + // 49th, ... ,56th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint32_t *tk1; + uint32_t *tk2; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t t3; // used in MACRO + uint32_t t4; // used in MACRO + +// TK1 + + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + + tk2 = (uint32_t*)&roundKeys[64]; + + // 1st, ... ,32th or 48th round +#ifndef ___NUM_OF_ROUNDS_56 + for(int j=0;j<2;j++) +#else + for(int j=0;j<3;j++) +#endif + { + tk1 = (uint32_t*)&roundKeys[0]; + for(int i=0;i<8;i++) + { + SKINNY_MAIN(); + } + } + + // 33th , ... ,40th or 49th, .... ,56th round + { + tk1 = (uint32_t*)&roundKeys[0]; + for(int i=0;i<4;i++) + { + SKINNY_MAIN(); + } + } +} + +#endif /* ___SKINNY_LOOP */ + +#endif /* ___ENABLE_DWORD_CAST */ + diff --git a/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/api.h b/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/api.h new file mode 100644 index 0000000..a4aa567 --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 diff --git a/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/encrypt.c b/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/encrypt.c new file mode 100644 index 0000000..f329721 --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/encrypt.c @@ -0,0 +1,1136 @@ +/* + * Date: 29 November 2018 + * Contact: Thomas Peyrin - thomas.peyrin@gmail.com + * Mustafa Khairallah - mustafam001@e.ntu.edu.sg + */ + +#include "crypto_aead.h" +#include "api.h" +#include "skinny.h" +#include +#include + +void pad (const unsigned char* m, unsigned char* mp, int len8) { + +#ifdef ___ENABLE_DWORD_CAST + + if (0 == len8) { + *(uint64_t*)(&mp[0]) = 0; + *(uint64_t*)(&mp[8]) = 0; + } else if (8 > len8) { + *(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]) & (0xffffffffffffffff >> (64 - len8*8)); + *(uint64_t*)(&mp[8]) = 0; + mp[15] = len8; + } else if (8 == len8) { + *(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]); + *(uint64_t*)(&mp[8]) = 0; + mp[15] = 8; + } else if (16 > len8) { + *(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]); + *(uint64_t*)(&mp[8]) = *(uint64_t*)(&m[8]) & (0xffffffffffffffff >> (128 - len8*8)); + mp[15] = len8; + } else { + *(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]); + *(uint64_t*)(&mp[8]) = *(uint64_t*)(&m[8]); + } + +#else + + if (0 == len8) { + *(uint32_t*)(&mp[0]) = 0; + *(uint32_t*)(&mp[4]) = 0; + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + } else if (4 > len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]) & (0xffffffff >> (32 - len8*8)); + *(uint32_t*)(&mp[4]) = 0; + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = len8; + } else if (4 == len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = 0; + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = 4; + } else if (8 > len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]) & (0xffffffff >> (64 - len8*8)); + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = len8; + } else if (8 == len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = 8; + } else if (12 > len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]) & (0xffffffff >> (96 - len8*8)); + *(uint32_t*)(&mp[12]) = 0; + mp[15] = len8; + } else if (12 == len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]); + *(uint32_t*)(&mp[12]) = 0; + mp[15] = 12; + } else if (16 > len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]); + *(uint32_t*)(&mp[12]) = *(uint32_t*)(&m[12]) & (0xffffffff >> (128 - len8*8)); + mp[15] = len8; + } else { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]); + *(uint32_t*)(&mp[12]) = *(uint32_t*)(&m[12]); + } + +#endif + +} + +void g8A (unsigned char* s, unsigned char* c) { + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t s0 = *(uint64_t*)(&s[0]); + uint64_t s1 = *(uint64_t*)(&s[8]); + + uint64_t c0, c1; + + c0 = ((s0 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x8080808080808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x8080808080808080); + + *(uint64_t*)(&c[0]) = c0; + *(uint64_t*)(&c[8]) = c1; + +#else + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t c0, c1, c2, c3; + + c0 = ((s0 >> 1) & 0x7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x80808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x80808080); + c2 = ((s2 >> 1) & 0x7f7f7f7f) ^ ((s2 ^ (s2 << 7)) & 0x80808080); + c3 = ((s3 >> 1) & 0x7f7f7f7f) ^ ((s3 ^ (s3 << 7)) & 0x80808080); + + *(uint32_t*)(&c[0]) = c0; + *(uint32_t*)(&c[4]) = c1; + *(uint32_t*)(&c[8]) = c2; + *(uint32_t*)(&c[12]) = c3; + +#endif + +} + +void g8A_for_Tag_Generation (unsigned char* s, unsigned char* c) { + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t s0 = *(uint64_t*)(&s[0]); + uint64_t s1 = *(uint64_t*)(&s[8]); + + uint64_t c0, c1; + + c0 = ((s0 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x8080808080808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x8080808080808080); + + // use byte access because of memory alignment. + // c is not always in word(4 byte) alignment. + c[0] = c0 &0xFF; + c[1] = (c0>>8) &0xFF; + c[2] = (c0>>16)&0xFF; + c[3] = (c0>>24)&0xFF; + c[4] = (c0>>32)&0xFF; + c[5] = (c0>>40)&0xFF; + c[6] = (c0>>48)&0xFF; + c[7] = c0>>56; + c[8] = c1 &0xFF; + c[9] = (c1>>8) &0xFF; + c[10] = (c1>>16)&0xFF; + c[11] = (c1>>24)&0xFF; + c[12] = (c1>>32)&0xFF; + c[13] = (c1>>40)&0xFF; + c[14] = (c1>>48)&0xFF; + c[15] = c1>>56; + +#else + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t c0, c1, c2, c3; + + c0 = ((s0 >> 1) & 0x7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x80808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x80808080); + c2 = ((s2 >> 1) & 0x7f7f7f7f) ^ ((s2 ^ (s2 << 7)) & 0x80808080); + c3 = ((s3 >> 1) & 0x7f7f7f7f) ^ ((s3 ^ (s3 << 7)) & 0x80808080); + + // use byte access because of memory alignment. + // c is not always in word(4 byte) alignment. + c[0] = c0 &0xFF; + c[1] = (c0>>8) &0xFF; + c[2] = (c0>>16)&0xFF; + c[3] = c0>>24; + c[4] = c1 &0xFF; + c[5] = (c1>>8) &0xFF; + c[6] = (c1>>16)&0xFF; + c[7] = c1>>24; + c[8] = c2 &0xFF; + c[9] = (c2>>8) &0xFF; + c[10] = (c2>>16)&0xFF; + c[11] = c2>>24; + c[12] = c3 &0xFF; + c[13] = (c3>>8) &0xFF; + c[14] = (c3>>16)&0xFF; + c[15] = c3>>24; + +#endif + +} + +void rho_ad_eqov16 ( + const unsigned char* m, + unsigned char* s) { + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) ^= *(uint64_t*)(&m[0]); + *(uint64_t*)(&s[8]) ^= *(uint64_t*)(&m[8]); + +#else + + *(uint32_t*)(&s[0]) ^= *(uint32_t*)(&m[0]); + *(uint32_t*)(&s[4]) ^= *(uint32_t*)(&m[4]); + *(uint32_t*)(&s[8]) ^= *(uint32_t*)(&m[8]); + *(uint32_t*)(&s[12]) ^= *(uint32_t*)(&m[12]); + +#endif + +} + +void rho_ad_ud16 ( + const unsigned char* m, + unsigned char* s, + int len8) { + + unsigned char mp [16]; + pad(m,mp,len8); + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) ^= *(uint64_t*)(&mp[0]); + *(uint64_t*)(&s[8]) ^= *(uint64_t*)(&mp[8]); + +#else + + *(uint32_t*)(&s[0]) ^= *(uint32_t*)(&mp[0]); + *(uint32_t*)(&s[4]) ^= *(uint32_t*)(&mp[4]); + *(uint32_t*)(&s[8]) ^= *(uint32_t*)(&mp[8]); + *(uint32_t*)(&s[12]) ^= *(uint32_t*)(&mp[12]); + +#endif + +} + +void rho_eqov16 ( + const unsigned char* m, + unsigned char* c, + unsigned char* s) { + + g8A(s,c); + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t c0 = *(uint64_t*)(&c[0]); + uint64_t c1 = *(uint64_t*)(&c[8]); + + uint64_t s0 = *(uint64_t*)(&s[0]); + uint64_t s1 = *(uint64_t*)(&s[8]); + + uint64_t m0 = *(uint64_t*)(&m[0]); + uint64_t m1 = *(uint64_t*)(&m[8]); + + s0 ^= m0; + s1 ^= m1; + + c0 ^= m0; + c1 ^= m1; + + *(uint64_t*)(&s[0]) = s0; + *(uint64_t*)(&s[8]) = s1; + + *(uint64_t*)(&c[0]) = c0; + *(uint64_t*)(&c[8]) = c1; + +#else + + uint32_t c0 = *(uint32_t*)(&c[0]); + uint32_t c1 = *(uint32_t*)(&c[4]); + uint32_t c2 = *(uint32_t*)(&c[8]); + uint32_t c3 = *(uint32_t*)(&c[12]); + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t m0 = *(uint32_t*)(&m[0]); + uint32_t m1 = *(uint32_t*)(&m[4]); + uint32_t m2 = *(uint32_t*)(&m[8]); + uint32_t m3 = *(uint32_t*)(&m[12]); + + s0 ^= m0; + s1 ^= m1; + s2 ^= m2; + s3 ^= m3; + + c0 ^= m0; + c1 ^= m1; + c2 ^= m2; + c3 ^= m3; + + *(uint32_t*)(&s[0]) = s0; + *(uint32_t*)(&s[4]) = s1; + *(uint32_t*)(&s[8]) = s2; + *(uint32_t*)(&s[12]) = s3; + + *(uint32_t*)(&c[0]) = c0; + *(uint32_t*)(&c[4]) = c1; + *(uint32_t*)(&c[8]) = c2; + *(uint32_t*)(&c[12]) = c3; + +#endif + +} + +void rho_ud16 ( + const unsigned char* m, + unsigned char* c, + unsigned char* s, + int len8) { + + unsigned char mp [16]; + + pad(m,mp,len8); + + g8A(s,c); + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t mp0 = *(uint64_t*)&mp[0]; + uint64_t mp1 = *(uint64_t*)&mp[8]; + uint64_t c0 = *(uint64_t*)&c[0]; + uint64_t c1 = *(uint64_t*)&c[8]; + + *(uint64_t*)(&s[0]) ^= mp0; + *(uint64_t*)(&s[8]) ^= mp1; + + if (0 == len8) { + c0 = 0; + c1 = 0; + } else if (8 > len8) { + c0 = c0 ^ (mp0 & 0xffffffffffffffff >> (64 - (len8*8))); + c0 = c0 ^ (c0 & 0xffffffffffffffff << ( (len8*8))); + c1 = 0; + } else if (8 == len8) { + c0 = c0 ^ mp0; + c1 = 0; + } else if (16 > len8) { + len8 -= 8; + c0 = c0 ^ mp0; + c1 = c1 ^ (mp1 & 0xffffffffffffffff >> (64 - (len8*8))); + c1 = c1 ^ (c1 & 0xffffffffffffffff << ( (len8*8))); + } else { + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + } + + *(uint64_t*)&c[0] = c0; + *(uint64_t*)&c[8] = c1; + +#else + + uint32_t mp0 = *(uint32_t*)&mp[0]; + uint32_t mp1 = *(uint32_t*)&mp[4]; + uint32_t mp2 = *(uint32_t*)&mp[8]; + uint32_t mp3 = *(uint32_t*)&mp[12]; + uint32_t c0 = *(uint32_t*)&c[0]; + uint32_t c1 = *(uint32_t*)&c[4]; + uint32_t c2 = *(uint32_t*)&c[8]; + uint32_t c3 = *(uint32_t*)&c[12]; + + *(uint32_t*)(&s[0]) ^= mp0; + *(uint32_t*)(&s[4]) ^= mp1; + *(uint32_t*)(&s[8]) ^= mp2; + *(uint32_t*)(&s[12]) ^= mp3; + + if (0 == len8) { + c0 = 0; + c1 = 0; + c2 = 0; + c3 = 0; + } else if (4 > len8) { + c0 = c0 ^ (mp0 & 0xffffffff >> (32 - (len8*8))); + c0 = c0 ^ (c0 & 0xffffffff << ( (len8*8))); + c1 = 0; + c2 = 0; + c3 = 0; + } else if (4 == len8) { + c0 = c0 ^ mp0; + c1 = 0; + c2 = 0; + c3 = 0; + } else if (8 > len8) { + len8 -= 4; + c0 = c0 ^ mp0; + c1 = c1 ^ (mp1 & 0xffffffff >> (32 - (len8*8))); + c1 = c1 ^ (c1 & 0xffffffff << ( (len8*8))); + c2 = 0; + c3 = 0; + } else if (8 == len8) { + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = 0; + c3 = 0; + } else if (12 > len8) { + len8 -= 8; + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = c2 ^ (mp2 & 0xffffffff >> (32 - (len8*8))); + c2 = c2 ^ (c2 & 0xffffffff << ( (len8*8))); + c3 = 0; + } else if (12 == len8) { + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = c2 ^ mp2; + c3 = 0; + } else if (16 > len8) { + len8 -= 12; + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = c2 ^ mp2; + c3 = c3 ^ (mp3 & 0xffffffff >> (32 - (len8*8))); + c3 = c3 ^ (c3 & 0xffffffff << ( (len8*8))); + } else { + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = c2 ^ mp2; + c3 = c3 ^ mp3; + } + + *(uint32_t*)&c[0] = c0; + *(uint32_t*)&c[4] = c1; + *(uint32_t*)&c[8] = c2; + *(uint32_t*)&c[12] = c3; + +#endif + +} + +void irho_eqov16 ( + unsigned char* m, + const unsigned char* c, + unsigned char* s) { + + g8A(s,m); + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t c0 = *(uint64_t*)(&c[0]); + uint64_t c1 = *(uint64_t*)(&c[8]); + + uint64_t s0 = *(uint64_t*)(&s[0]); + uint64_t s1 = *(uint64_t*)(&s[8]); + + uint64_t m0 = *(uint64_t*)(&m[0]); + uint64_t m1 = *(uint64_t*)(&m[8]); + + s0 ^= c0 ^ m0; + s1 ^= c1 ^ m1; + + m0 ^= c0; + m1 ^= c1; + + *(uint64_t*)(&s[0]) = s0; + *(uint64_t*)(&s[8]) = s1; + + *(uint64_t*)(&m[0]) = m0; + *(uint64_t*)(&m[8]) = m1; + +#else + + uint32_t c0 = *(uint32_t*)(&c[0]); + uint32_t c1 = *(uint32_t*)(&c[4]); + uint32_t c2 = *(uint32_t*)(&c[8]); + uint32_t c3 = *(uint32_t*)(&c[12]); + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t m0 = *(uint32_t*)(&m[0]); + uint32_t m1 = *(uint32_t*)(&m[4]); + uint32_t m2 = *(uint32_t*)(&m[8]); + uint32_t m3 = *(uint32_t*)(&m[12]); + + s0 ^= c0 ^ m0; + s1 ^= c1 ^ m1; + s2 ^= c2 ^ m2; + s3 ^= c3 ^ m3; + + m0 ^= c0; + m1 ^= c1; + m2 ^= c2; + m3 ^= c3; + + *(uint32_t*)(&s[0]) = s0; + *(uint32_t*)(&s[4]) = s1; + *(uint32_t*)(&s[8]) = s2; + *(uint32_t*)(&s[12]) = s3; + + *(uint32_t*)(&m[0]) = m0; + *(uint32_t*)(&m[4]) = m1; + *(uint32_t*)(&m[8]) = m2; + *(uint32_t*)(&m[12]) = m3; + +#endif + +} + +void irho_ud16 ( + unsigned char* m, + const unsigned char* c, + unsigned char* s, + int len8) { + + unsigned char cp [16]; + + pad(c,cp,len8); + + g8A(s,m); + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t cp0 = *(uint64_t*)&cp[0]; + uint64_t cp1 = *(uint64_t*)&cp[8]; + uint64_t m0 = *(uint64_t*)&m[0]; + uint64_t m1 = *(uint64_t*)&m[8]; + uint64_t s0 = *(uint64_t*)&s[0]; + uint64_t s1 = *(uint64_t*)&s[8]; + + s0 ^= cp0; + s1 ^= cp1; + + if (0 == len8) { + m0 = 0; + m1 = 0; + } else if (8 > len8) { + s0 = s0 ^ (m0 & 0xffffffffffffffff >> (64 - (len8*8))); + + m0 = m0 ^ (cp0 & 0xffffffffffffffff >> (64 - (len8*8))); + m0 = m0 ^ (m0 & 0xffffffffffffffff << ( (len8*8))); + m1 = 0; + } else if (8 == len8) { + s0 = s0 ^ m0; + + m0 = m0 ^ cp0; + m1 = 0; + } else if (16 > len8) { + len8 -= 8; + s0 = s0 ^ m0; + s1 = s1 ^ (m1 & 0xffffffffffffffff >> (64 - (len8*8))); + + m0 = m0 ^ cp0; + m1 = m1 ^ (cp1 & 0xffffffffffffffff >> (64 - (len8*8))); + m1 = m1 ^ (m1 & 0xffffffffffffffff << ( (len8*8))); + } else { + s0 = s0 ^ m0; + s1 = s1 ^ m1; + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + } + + *(uint64_t*)&s[0] = s0; + *(uint64_t*)&s[8] = s1; + *(uint64_t*)&m[0] = m0; + *(uint64_t*)&m[8] = m1; + +#else + + uint32_t cp0 = *(uint32_t*)&cp[0]; + uint32_t cp1 = *(uint32_t*)&cp[4]; + uint32_t cp2 = *(uint32_t*)&cp[8]; + uint32_t cp3 = *(uint32_t*)&cp[12]; + uint32_t m0 = *(uint32_t*)&m[0]; + uint32_t m1 = *(uint32_t*)&m[4]; + uint32_t m2 = *(uint32_t*)&m[8]; + uint32_t m3 = *(uint32_t*)&m[12]; + uint32_t s0 = *(uint32_t*)&s[0]; + uint32_t s1 = *(uint32_t*)&s[4]; + uint32_t s2 = *(uint32_t*)&s[8]; + uint32_t s3 = *(uint32_t*)&s[12]; + + s0 ^= cp0; + s1 ^= cp1; + s2 ^= cp2; + s3 ^= cp3; + + if (0 == len8) { + m0 = 0; + m1 = 0; + m2 = 0; + m3 = 0; + } else if (4 > len8) { + s0 = s0 ^ (m0 & 0xffffffff >> (32 - (len8*8))); + + m0 = m0 ^ (cp0 & 0xffffffff >> (32 - (len8*8))); + m0 = m0 ^ (m0 & 0xffffffff << ( (len8*8))); + m1 = 0; + m2 = 0; + m3 = 0; + } else if (4 == len8) { + s0 = s0 ^ m0; + + m0 = m0 ^ cp0; + m1 = 0; + m2 = 0; + m3 = 0; + } else if (8 > len8) { + len8 -= 4; + s0 = s0 ^ m0; + s1 = s1 ^ (m1 & 0xffffffff >> (32 - (len8*8))); + + m0 = m0 ^ cp0; + m1 = m1 ^ (cp1 & 0xffffffff >> (32 - (len8*8))); + m1 = m1 ^ (m1 & 0xffffffff << ( (len8*8))); + m2 = 0; + m3 = 0; + } else if (8 == len8) { + s0 = s0 ^ m0; + s1 = s1 ^ m1; + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = 0; + m3 = 0; + } else if (12 > len8) { + len8 -= 8; + s0 = s0 ^ m0; + s1 = s1 ^ m1; + s2 = s2 ^ (m2 & 0xffffffff >> (32 - (len8*8))); + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = m2 ^ (cp2 & 0xffffffff >> (32 - (len8*8))); + m2 = m2 ^ (m2 & 0xffffffff << ( (len8*8))); + m3 = 0; + } else if (12 == len8) { + s0 = s0 ^ m0; + s1 = s1 ^ m1; + s2 = s2 ^ m2; + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = m2 ^ cp2; + m3 = 0; + } else if (16 > len8) { + len8 -= 12; + s0 = s0 ^ m0; + s1 = s1 ^ m1; + s2 = s2 ^ m2; + s3 = s3 ^ (m3 & 0xffffffff >> (32 - (len8*8))); + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = m2 ^ cp2; + m3 = m3 ^ (cp3 & 0xffffffff >> (32 - (len8*8))); + m3 = m3 ^ (m3 & 0xffffffff << ( (len8*8))); + } else { + s0 = s0 ^ m0; + s1 = s1 ^ m1; + s2 = s2 ^ m2; + s3 = s3 ^ m3; + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = m2 ^ cp2; + m3 = m3 ^ cp3; + } + + *(uint32_t*)&s[0] = s0; + *(uint32_t*)&s[4] = s1; + *(uint32_t*)&s[8] = s2; + *(uint32_t*)&s[12] = s3; + *(uint32_t*)&m[0] = m0; + *(uint32_t*)&m[4] = m1; + *(uint32_t*)&m[8] = m2; + *(uint32_t*)&m[12] = m3; + +#endif + +} + +void reset_lfsr_gf56 (unsigned char* CNT) { + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&CNT[0]) = 0x0000000000000001; // CNT7 CNT6 CNT5 CNT4 CNT3 CNT2 CNT1 CNT0 + +#else + + *(uint32_t*)(&CNT[0]) = 0x00000001; // CNT3 CNT2 CNT1 CNT0 + *(uint32_t*)(&CNT[4]) = 0x00000000; // CNT7 CNT6 CNT5 CNT4 + +#endif + +} + +void lfsr_gf56 (unsigned char* CNT) { + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t C0; + uint64_t fb0; + + C0 = *(uint64_t*)(&CNT[0]); // CNT7 CNT6 CNT5 CNT4 CNT3 CNT2 CNT1 CNT0 + + fb0 = 0; + if (CNT[6] & 0x80) { + fb0 = 0x95; + } + + C0 = C0 << 1 ^ fb0; + + *(uint64_t*)(&CNT[0]) = C0; + +#else + + uint32_t C0; + uint32_t C1; + uint32_t fb0; + + C0 = *(uint32_t*)(&CNT[0]); // CNT3 CNT2 CNT1 CNT0 + C1 = *(uint32_t*)(&CNT[4]); // CNT7 CNT6 CNT5 CNT4 + + fb0 = 0; + if (CNT[6] & 0x80) { + fb0 = 0x95; + } + + C1 = C1 << 1 | C0 >> 31; + C0 = C0 << 1 ^ fb0; + + *(uint32_t*)(&CNT[0]) = C0; + *(uint32_t*)(&CNT[4]) = C1; + +#endif + +} + +void block_cipher( + unsigned char* s, + const unsigned char* k, unsigned char* T, + unsigned char* CNT, unsigned char D, + skinny_ctrl* p_skinny_ctrl) { + + CNT[7] = D; + p_skinny_ctrl->func_skinny_128_384_enc(s, p_skinny_ctrl, CNT, T, k); + +} + +void nonce_encryption ( + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + skinny_ctrl* p_skinny_ctrl) { + + block_cipher(s,k,(unsigned char*)N,CNT,D,p_skinny_ctrl); + +} + +void generate_tag ( + unsigned char** c, unsigned char* s, + unsigned long long* clen) { + + g8A_for_Tag_Generation(s, *c); + + *c = *c + 16; + *c = *c - *clen; + +} + +unsigned long long msg_encryption_eqov16 ( + const unsigned char** M, unsigned char** c, + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long mlen, + skinny_ctrl* p_skinny_ctrl) { + + rho_eqov16(*M, *c, s); + *c = *c + 16; + *M = *M + 16; + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,D,p_skinny_ctrl); + return mlen - 16; + +} + +unsigned long long msg_encryption_ud16 ( + const unsigned char** M, unsigned char** c, + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long mlen, + skinny_ctrl* p_skinny_ctrl) { + +// char msg[64]; +// +// unsigned int st = (unsigned int )read_cycle(); + + rho_ud16(*M, *c, s, mlen); + +// unsigned int ed = (unsigned int )read_cycle(); +// sprintf(msg, "rho_ud16 %d\n", ed-st); +// SerialPuts(msg); +// +// fprint_bstr(NULL, "c = ", *c, 16); + + *c = *c + mlen; + *M = *M + mlen; + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,D,p_skinny_ctrl); + return 0; + +} + +unsigned long long msg_decryption_eqov16 ( + unsigned char** M, const unsigned char** c, + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long clen, + skinny_ctrl* p_skinny_ctrl) { + + irho_eqov16(*M, *c, s); + *c = *c + 16; + *M = *M + 16; + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,D,p_skinny_ctrl); + + return clen - 16; + +} + +unsigned long long msg_decryption_ud16 ( + unsigned char** M, const unsigned char** c, + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long clen, + skinny_ctrl* p_skinny_ctrl) { + + irho_ud16(*M, *c, s, clen); + *c = *c + clen; + *M = *M + clen; + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,D,p_skinny_ctrl); + return 0; + +} + +unsigned long long ad_encryption_eqov32 ( + const unsigned char** A, unsigned char* s, + const unsigned char* k, unsigned long long adlen, + unsigned char* CNT, + unsigned char D, + skinny_ctrl* p_skinny_ctrl) { + + unsigned char T [16]; + + rho_ad_eqov16(*A, s); + *A = *A + 16; + lfsr_gf56(CNT); + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&T[0]) = *(uint64_t*)(&(*A)[0]); + *(uint64_t*)(&T[8]) = *(uint64_t*)(&(*A)[8]); + +#else + + *(uint32_t*)(&T[0]) = *(uint32_t*)(&(*A)[0]); + *(uint32_t*)(&T[4]) = *(uint32_t*)(&(*A)[4]); + *(uint32_t*)(&T[8]) = *(uint32_t*)(&(*A)[8]); + *(uint32_t*)(&T[12]) = *(uint32_t*)(&(*A)[12]); + +#endif + + *A = *A + 16; + block_cipher(s,k,T,CNT,D,p_skinny_ctrl); + lfsr_gf56(CNT); + + return adlen - 32; + +} + +unsigned long long ad_encryption_ov16 ( + const unsigned char** A, unsigned char* s, + const unsigned char* k, unsigned long long adlen, + unsigned char* CNT, + unsigned char D, + skinny_ctrl* p_skinny_ctrl) { + + unsigned char T [16]; + + adlen = adlen - 16; + rho_ad_eqov16(*A, s); + *A = *A + 16; + lfsr_gf56(CNT); + + pad(*A, T, adlen); + *A = *A + adlen; + block_cipher(s,k,T,CNT,D,p_skinny_ctrl); + lfsr_gf56(CNT); + + return 0; + +} + +unsigned long long ad_encryption_eq16 ( + const unsigned char** A, unsigned char* s, + unsigned char* CNT) { + + rho_ad_eqov16(*A, s); + *A = *A + 16; + lfsr_gf56(CNT); + + return 0; + +} + +unsigned long long ad_encryption_ud16( + const unsigned char** A, unsigned char* s, + unsigned long long adlen, + unsigned char* CNT) { + + rho_ad_ud16(*A, s, adlen); + *A = *A + adlen; + lfsr_gf56(CNT); + + return 0; + +} + +int crypto_aead_encrypt ( + unsigned char* c, unsigned long long* clen, + const unsigned char* m, unsigned long long mlen, + const unsigned char* ad, unsigned long long adlen, + const unsigned char* nsec, + const unsigned char* npub, + const unsigned char* k) { + + unsigned char s[16]; + unsigned char CNT[8]; + const unsigned char* A; + const unsigned char* M; + const unsigned char* N; + + skinny_ctrl ctrl; + ctrl.func_skinny_128_384_enc = skinny_128_384_enc123_12; + + (void) nsec; + A = ad; + M = m; + N = npub; + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) = 0; + *(uint64_t*)(&s[8]) = 0; + +#else + + *(uint32_t*)(&s[0]) = 0; + *(uint32_t*)(&s[4]) = 0; + *(uint32_t*)(&s[8]) = 0; + *(uint32_t*)(&s[12]) = 0; + +#endif + + reset_lfsr_gf56(CNT); + + if (adlen == 0) { // AD is an empty string + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,0x1a,&ctrl); + } + else while (adlen > 0) { + if (adlen < 16) { // The last block of AD is odd and incomplete + adlen = ad_encryption_ud16(&A,s,adlen,CNT); + nonce_encryption(N,CNT,s,k,0x1a,&ctrl); + } + else if (adlen == 16) { // The last block of AD is odd and complete + adlen = ad_encryption_eq16(&A,s,CNT); + nonce_encryption(N,CNT,s,k,0x18,&ctrl); + } + else if (adlen < 32) { // The last block of AD is even and incomplete + adlen = ad_encryption_ov16(&A,s,k,adlen,CNT,0x08,&ctrl); + nonce_encryption(N,CNT,s,k,0x1a,&ctrl); + } + else if (adlen == 32) { // The last block of AD is even and complete + adlen = ad_encryption_eqov32(&A,s,k,adlen,CNT,0x08,&ctrl); + nonce_encryption(N,CNT,s,k,0x18,&ctrl); + } + else { // A normal full pair of blocks of AD + adlen = ad_encryption_eqov32(&A,s,k,adlen,CNT,0x08,&ctrl); + } + } + + ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1; + + reset_lfsr_gf56(CNT); + + *clen = mlen + 16; + + if (mlen == 0) { // M is an empty string + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,0x15,&ctrl); + } + else while (mlen > 0) { + if (mlen < 16) { // The last block of M is incomplete + mlen = msg_encryption_ud16(&M,&c,N,CNT,s,k,0x15,mlen,&ctrl); + } + else if (mlen == 16) { // The last block of M is complete + mlen = msg_encryption_eqov16(&M,&c,N,CNT,s,k,0x14,mlen,&ctrl); + } + else { // A normal full message block + mlen = msg_encryption_eqov16(&M,&c,N,CNT,s,k,0x04,mlen,&ctrl); + } + } + + // Tag generation + generate_tag(&c,s,clen); + + return 0; + +} + +int crypto_aead_decrypt( + unsigned char *m,unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c,unsigned long long clen, + const unsigned char *ad,unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k) { + + unsigned char s[16]; + unsigned char T[16]; + unsigned char CNT[8]; + const unsigned char* A; + unsigned char* M; + const unsigned char* N; + + skinny_ctrl ctrl; + ctrl.func_skinny_128_384_enc = skinny_128_384_enc123_12; + + (void) nsec; + A = ad; + M = m; + N = npub; + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) = 0; + *(uint64_t*)(&s[8]) = 0; + +#else + + *(uint32_t*)(&s[0]) = 0; + *(uint32_t*)(&s[4]) = 0; + *(uint32_t*)(&s[8]) = 0; + *(uint32_t*)(&s[12]) = 0; + +#endif + + reset_lfsr_gf56(CNT); + + if (adlen == 0) { // AD is an empty string + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,0x1a,&ctrl); + } + else while (adlen > 0) { + if (adlen < 16) { // The last block of AD is odd and incomplete + adlen = ad_encryption_ud16(&A,s,adlen,CNT); + nonce_encryption(N,CNT,s,k,0x1a,&ctrl); + } + else if (adlen == 16) { // The last block of AD is odd and complete + adlen = ad_encryption_eq16(&A,s,CNT); + nonce_encryption(N,CNT,s,k,0x18,&ctrl); + } + else if (adlen < 32) { // The last block of AD is even and incomplete + adlen = ad_encryption_ov16(&A,s,k,adlen,CNT,0x08,&ctrl); + nonce_encryption(N,CNT,s,k,0x1a,&ctrl); + } + else if (adlen == 32) { // The last block of AD is even and complete + adlen = ad_encryption_eqov32(&A,s,k,adlen,CNT,0x08,&ctrl); + nonce_encryption(N,CNT,s,k,0x18,&ctrl); + } + else { // A normal full pair of blocks of AD + adlen = ad_encryption_eqov32(&A,s,k,adlen,CNT,0x08,&ctrl); + } + } + + ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1; + + reset_lfsr_gf56(CNT); + + clen = clen -16; + *mlen = clen; + + if (clen == 0) { // C is an empty string + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,0x15,&ctrl); + } + else while (clen > 0) { + if (clen < 16) { // The last block of C is incomplete + clen = msg_decryption_ud16(&M,&c,N,CNT,s,k,0x15,clen,&ctrl); + } + else if (clen == 16) { // The last block of C is complete + clen = msg_decryption_eqov16(&M,&c,N,CNT,s,k,0x14,clen,&ctrl); + } + else { // A normal full message block + clen = msg_decryption_eqov16(&M,&c,N,CNT,s,k,0x04,clen,&ctrl); + } + } + + // Tag generation + g8A_for_Tag_Generation(s, T); + + for (int i = 0; i < 16; i++) { + if (T[i] != (*(c+i))) { + return -1; + } + } + + return 0; + +} diff --git a/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/skinny.h b/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/skinny.h new file mode 100644 index 0000000..826f2f8 --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/skinny.h @@ -0,0 +1,106 @@ +#define ___SKINNY_LOOP +#define ___NUM_OF_ROUNDS_56 +#if (defined(__riscv_xlen) && (__riscv_xlen == 64)) +#define ___ENABLE_DWORD_CAST +#endif + +#include + +typedef struct ___skinny_ctrl { +#ifdef ___NUM_OF_ROUNDS_56 + unsigned char roundKeys[960]; // number of rounds : 56 +#else + unsigned char roundKeys[704]; // number of rounds : 40 +#endif + void (*func_skinny_128_384_enc)(unsigned char*, struct ___skinny_ctrl*, unsigned char* CNT, unsigned char* T, const unsigned char* K); +} skinny_ctrl; + +extern void skinny_128_384_enc123_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K); +extern void skinny_128_384_enc12_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K); +extern void skinny_128_384_enc1_1 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K); + +#define pack_word(x0, x1, x2, x3, w) \ + w = ((x3) << 24) ^ \ + ((x2) << 16) ^ \ + ((x1) << 8) ^ \ + (x0); + +#define unpack_word(x0, x1, x2, x3, w) \ + x0 = ((w) & 0xff); \ + x1 = (((w) >> 8) & 0xff); \ + x2 = (((w) >> 16) & 0xff); \ + x3 = ((w) >> 24); + +#ifdef ___ENABLE_DWORD_CAST + +#define PERMUTATION() \ +/* permutation */ \ + \ + /* 7 6 5 4 3 2 1 0 */ \ + /* 5 7 2 3 6 0 4 1 */ \ + \ + /* dw (7 6 5 4 3 2 1 0) */ \ + \ + /* dw (5 7 2 3 6 0 4 1) */ \ + \ + dt0 = dw >> 24; /* - - - 7 6 5 4 3 */ \ + dt0 = dt0 & 0x00000000ff00ff00; /* - - - - 6 - 4 - */ \ + \ + dt1 = dw << 16; /* 5 4 3 2 1 0 - - */ \ + dt1 = dt1 & 0xff00000000ff0000; /* 5 - - - - 0 - - */ \ + dt0 = dt0 ^ dt1; /* 5 - - - 6 0 4 - */ \ + \ + dt1 = dw >> 8; /* - 7 6 5 4 3 2 1 */ \ + dt1 = dt1 & 0x00ff0000000000ff; /* - 7 - - - - - 1 */ \ + dt0 = dt0 ^ dt1; /* 5 7 - - 6 0 4 1 */ \ + \ + dt1 = dw << 8; /* 6 5 4 3 2 1 0 - */ \ + dt1 = dt1 & 0x000000ff00000000; /* - - - 3 - - - - */ \ + dt0 = dt0 ^ dt1; /* 5 7 - 3 6 0 4 1 */ \ + \ + dt1 = dw << 24; /* 4 3 2 1 0 - - - */ \ + dw = dt1 & 0x0000ff0000000000; /* - - 2 - - - - - */ \ + dw = dw ^ dt0; /* 5 7 2 3 6 0 4 1 */ + +#else + +#define PERMUTATION() \ +/* permutation */ \ + \ + /* 7 6 5 4 3 2 1 0 */ \ + /* 5 7 2 3 6 0 4 1 */ \ + \ + /* w0 (3 2 1 0) */ \ + /* w1 (7 6 5 4) */ \ + \ + /* w0 (6 0 4 1) */ \ + /* w1 (5 7 2 3) */ \ + \ + t0 = w1 << 8; /* 6 5 4 - */ \ + t0 = t0 & 0xff00ff00; /* 6 - 4 - */ \ + \ + t1 = w1 << 16; /* 5 4 - - */ \ + t1 = t1 & 0xff000000; /* 5 - - - */ \ + \ + t2 = w1 & 0xff000000; /* 7 - - - */ \ + t2 = t2 >> 8; /* - 7 - - */ \ + t1 = t1 ^ t2; /* 5 7 - - */ \ + \ + t2 = w0 & 0xff000000; /* 3 - - - */ \ + t2 = t2 >> 24; /* - - - 3 */ \ + t1 = t1 ^ t2; /* 5 7 - 3 */ \ + \ + w1 = w0 >> 8; /* - 3 2 1 */ \ + w1 = w1 & 0x0000ff00; /* - - 2 - */ \ + w1 = w1 ^ t1; /* 5 7 2 3 */ \ + \ + t2 = w0 & 0x0000ff00; /* - - 1 - */ \ + t2 = t2 >> 8; /* - - - 1 */ \ + t0 = t0 ^ t2; /* 6 - 4 1 */ \ + \ + w0 = w0 << 16; /* 1 0 - - */ \ + w0 = w0 & 0x00ff0000; /* - 0 - - */ \ + w0 = w0 ^ t0; /* 6 0 4 1 */ + +#endif + diff --git a/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/skinny_key_schedule2.c b/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/skinny_key_schedule2.c new file mode 100644 index 0000000..c2f30de --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/skinny_key_schedule2.c @@ -0,0 +1,431 @@ +/****************************************************************************** + * Copyright (c) 2020, NEC Corporation. + * + * THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND. + * + *****************************************************************************/ + +/* + * SKINNY-128-384 + * + * load * AC(c0 c1) ^ TK3 + * calc AC(c0 c1) ^ TK2 -> store + * ART(TK2) + * + * number of rounds : 40 or 56 + */ + +#include "skinny.h" + +#ifdef ___ENABLE_DWORD_CAST + +#define PERMUTATION_TK2() \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK2) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x6 x5 x4 x3 x2 x1 x0 x7^x5) */ \ + dw = ((dw << 1) & 0xfefefefefefefefe) ^ \ + (((dw >> 7) ^ (dw >> 5)) & 0x0101010101010101); \ + \ + /* Load TK3 */ \ + /* TK2^TK3^AC(c0 c1) */ \ + /* store */ \ + *tk2 = dw ^ *tk3; \ + tk2 += 2; \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys) +{ + uint64_t* tk2; // used in MACRO + uint64_t* tk3; // used in MACRO + uint64_t dt0; // used in MACRO + uint64_t dt1; // used in MACRO + uint64_t dw; + + // odd + + // load master key + // load master key + dw = *(uint64_t*)&roundKeys[16]; + + tk2 = (uint64_t*)&roundKeys[64]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[384]; +#else + tk3 = (uint64_t*)&roundKeys[512]; +#endif + + // 1st round + *tk2 = dw ^ *tk3; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th,43th, ... ,51th,53th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + + // even + + // load master key + dw = *(uint64_t*)&roundKeys[24]; + + tk2 = (uint64_t*)&roundKeys[72]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[392]; +#else + tk3 = (uint64_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys) +{ + uint64_t* tk2; // used in MACRO + uint64_t* tk3; // used in MACRO + uint64_t dt0; // used in MACRO + uint64_t dt1; // used in MACRO + uint64_t dw; + + // odd + + // load master key + dw = *(uint64_t*)&roundKeys[16]; + + tk2 = (uint64_t*)&roundKeys[64]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[384]; +#else + tk3 = (uint64_t*)&roundKeys[512]; +#endif + + // 1st round + *tk2 = dw ^ *tk3; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + PERMUTATION_TK2(); + } + + // even + + // load master key + dw = *(uint64_t*)&roundKeys[24]; + + tk2 = (uint64_t*)&roundKeys[72]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[392]; +#else + tk3 = (uint64_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + PERMUTATION_TK2(); + } + +} + +#endif /* ___SKINNY_LOOP */ + +#else /* ___ENABLE_DWORD_CAST */ + +#define PERMUTATION_TK2() \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK2) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x6 x5 x4 x3 x2 x1 x0 x7^x5) */ \ + w0 = ((w0 << 1) & 0xfefefefe) ^ \ + (((w0 >> 7) ^ (w0 >> 5)) & 0x01010101); \ + w1 = ((w1 << 1) & 0xfefefefe) ^ \ + (((w1 >> 7) ^ (w1 >> 5)) & 0x01010101); \ + \ + /* Load TK3 */ \ + /* TK2^TK3^AC(c0 c1) */ \ + /* store */ \ + *tk2++ = w0 ^ *tk3++; \ + *tk2++ = w1 ^ *tk3++; \ + tk2 += 2; \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys) +{ + uint32_t* tk2; // used in MACRO + uint32_t* tk3; // used in MACRO + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + + // odd + + // load master key + w0 = *(uint32_t*)&roundKeys[16]; + w1 = *(uint32_t*)&roundKeys[20]; + + tk2 = (uint32_t*)&roundKeys[64]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[384]; +#else + tk3 = (uint32_t*)&roundKeys[512]; +#endif + + // 1st round + *tk2++ = w0 ^ *tk3++; + *tk2++ = w1 ^ *tk3++; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th,43th, ... ,51th,53th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + + // even + + // load master key + w0 = *(uint32_t*)&roundKeys[24]; + w1 = *(uint32_t*)&roundKeys[28]; + + tk2 = (uint32_t*)&roundKeys[72]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[392]; +#else + tk3 = (uint32_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys) +{ + uint32_t* tk2; // used in MACRO + uint32_t* tk3; // used in MACRO + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + + // odd + + // load master key + w0 = *(uint32_t*)&roundKeys[16]; + w1 = *(uint32_t*)&roundKeys[20]; + + tk2 = (uint32_t*)&roundKeys[64]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[384]; +#else + tk3 = (uint32_t*)&roundKeys[512]; +#endif + + // 1st round + *tk2++ = w0 ^ *tk3++; + *tk2++ = w1 ^ *tk3++; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + PERMUTATION_TK2(); + } + + // even + + // load master key + w0 = *(uint32_t*)&roundKeys[24]; + w1 = *(uint32_t*)&roundKeys[28]; + + tk2 = (uint32_t*)&roundKeys[72]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[392]; +#else + tk3 = (uint32_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + PERMUTATION_TK2(); + } + +} + +#endif /* ___SKINNY_LOOP */ + +#endif /* ___ENABLE_DWORD_CAST */ + diff --git a/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/skinny_key_schedule3.c b/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/skinny_key_schedule3.c new file mode 100644 index 0000000..5dcaf7f --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/skinny_key_schedule3.c @@ -0,0 +1,428 @@ +/****************************************************************************** + * Copyright (c) 2020, NEC Corporation. + * + * THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND. + * + *****************************************************************************/ + +/* + * SKINNY-128-384 + * + * AC(c0 c1) ^ TK3 -> store + * ART(TK3) + * + * number of rounds : 40 or 56 + */ + +#include "skinny.h" + +#ifdef ___ENABLE_DWORD_CAST + +#define PERMUTATION_TK3(c0Val, c1Val) \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK3) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x0^x6 x7 x6 x5 x4 x3 x2 x1) */ \ + dw = ((dw >> 1) & 0x7f7f7f7f7f7f7f7f) ^ \ + (((dw << 7) ^ (dw << 1)) & 0x8080808080808080); \ + \ + /* K3^AC(c0 c1) */ \ + /* store */ \ + dt0 = dw ^ c0Val; \ + *tk3 = dt0 ^ ((uint64_t)c1Val << 40); \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys) +{ + uint64_t *tk3; + uint64_t dt0; // used in MACRO + uint64_t dt1; // used in MACRO + uint64_t dw; + + // odd + + // load master key + dw = *(uint64_t*)&roundKeys[32]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[384]; +#else + tk3 = (uint64_t*)&roundKeys[512]; +#endif + + // 1st round + *tk3++ = dw ^ 0x01; + tk3 += 1; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK3(0x7, 0x0); + PERMUTATION_TK3(0xf, 0x1); + PERMUTATION_TK3(0xd, 0x3); + PERMUTATION_TK3(0x7, 0x3); + PERMUTATION_TK3(0xe, 0x1); + PERMUTATION_TK3(0x9, 0x3); + PERMUTATION_TK3(0x7, 0x2); + PERMUTATION_TK3(0xd, 0x1); + PERMUTATION_TK3(0x5, 0x3); + + PERMUTATION_TK3(0x6, 0x1); + PERMUTATION_TK3(0x8, 0x1); + PERMUTATION_TK3(0x1, 0x2); + PERMUTATION_TK3(0x5, 0x0); + PERMUTATION_TK3(0x7, 0x1); + PERMUTATION_TK3(0xc, 0x1); + PERMUTATION_TK3(0x1, 0x3); + PERMUTATION_TK3(0x6, 0x0); + PERMUTATION_TK3(0xb, 0x1); + PERMUTATION_TK3(0xd, 0x2); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41td,43th, ... ,53th,55th round + PERMUTATION_TK3(0x4, 0x3); + PERMUTATION_TK3(0x2, 0x1); + PERMUTATION_TK3(0x8, 0x0); + PERMUTATION_TK3(0x2, 0x2); + PERMUTATION_TK3(0x9, 0x0); + PERMUTATION_TK3(0x6, 0x2); + PERMUTATION_TK3(0x9, 0x1); + PERMUTATION_TK3(0x5, 0x2); + +#endif + + // even + + // load master key + dw = *(uint64_t*)&roundKeys[40]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[392]; +#else + tk3 = (uint64_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... ,38th,40th round + PERMUTATION_TK3(0x3, 0x0); + PERMUTATION_TK3(0xf, 0x0); + PERMUTATION_TK3(0xe, 0x3); + PERMUTATION_TK3(0xb, 0x3); + PERMUTATION_TK3(0xf, 0x2); + PERMUTATION_TK3(0xc, 0x3); + PERMUTATION_TK3(0x3, 0x3); + PERMUTATION_TK3(0xe, 0x0); + PERMUTATION_TK3(0xa, 0x3); + PERMUTATION_TK3(0xb, 0x2); + + PERMUTATION_TK3(0xc, 0x2); + PERMUTATION_TK3(0x0, 0x3); + PERMUTATION_TK3(0x2, 0x0); + PERMUTATION_TK3(0xb, 0x0); + PERMUTATION_TK3(0xe, 0x2); + PERMUTATION_TK3(0x8, 0x3); + PERMUTATION_TK3(0x3, 0x2); + PERMUTATION_TK3(0xd, 0x0); + PERMUTATION_TK3(0x6, 0x3); + PERMUTATION_TK3(0xa, 0x1); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK3(0x9, 0x2); + PERMUTATION_TK3(0x4, 0x2); + PERMUTATION_TK3(0x1, 0x1); + PERMUTATION_TK3(0x4, 0x0); + PERMUTATION_TK3(0x3, 0x1); + PERMUTATION_TK3(0xc, 0x0); + PERMUTATION_TK3(0x2, 0x3); + PERMUTATION_TK3(0xa, 0x0); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys, unsigned char *pRC) +{ + uint64_t *tk3; + uint64_t dt0; // used in MACRO + uint64_t dt1; // used in MACRO + uint64_t dw; + uint64_t c0; + uint64_t c1; + + // odd + + // load master key + dw = *(uint64_t*)&roundKeys[32]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[384]; +#else + tk3 = (uint64_t*)&roundKeys[512]; +#endif + + // 1st round + *tk3++ = dw ^ 0x01; + tk3 += 1; + + pRC += 4; + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + + // even + + // load master key + dw = *(uint64_t*)&roundKeys[40]; + +#ifndef ___NUM_OF_ROUNDS_56 + pRC -= 78; + tk3 = (uint64_t*)&roundKeys[392]; +#else + pRC -= 110; + tk3 = (uint64_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + +} + +#endif /* ___SKINNY_LOOP */ + +#else /* ___ENABLE_DWORD_CAST */ + +#define PERMUTATION_TK3(c0Val, c1Val) \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK3) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x0^x6 x7 x6 x5 x4 x3 x2 x1) */ \ + w0 = ((w0 >> 1) & 0x7f7f7f7f) ^ \ + (((w0 << 7) ^ (w0 << 1)) & 0x80808080); \ + w1 = ((w1 >> 1) & 0x7f7f7f7f) ^ \ + (((w1 << 7) ^ (w1 << 1)) & 0x80808080); \ + \ + /* K3^AC(c0 c1) */ \ + /* store */ \ + *tk3++ = w0 ^ c0Val; \ + *tk3++ = w1 ^ ((uint32_t)c1Val << 8); \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys) +{ + uint32_t *tk3; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + + // odd + + // load master key + w0 = *(uint32_t*)&roundKeys[32]; + w1 = *(uint32_t*)&roundKeys[36]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[384]; +#else + tk3 = (uint32_t*)&roundKeys[512]; +#endif + + // 1st round + *tk3++ = w0 ^ 0x01; + *tk3++ = w1; + tk3 += 2; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK3(0x7, 0x0); + PERMUTATION_TK3(0xf, 0x1); + PERMUTATION_TK3(0xd, 0x3); + PERMUTATION_TK3(0x7, 0x3); + PERMUTATION_TK3(0xe, 0x1); + PERMUTATION_TK3(0x9, 0x3); + PERMUTATION_TK3(0x7, 0x2); + PERMUTATION_TK3(0xd, 0x1); + PERMUTATION_TK3(0x5, 0x3); + + PERMUTATION_TK3(0x6, 0x1); + PERMUTATION_TK3(0x8, 0x1); + PERMUTATION_TK3(0x1, 0x2); + PERMUTATION_TK3(0x5, 0x0); + PERMUTATION_TK3(0x7, 0x1); + PERMUTATION_TK3(0xc, 0x1); + PERMUTATION_TK3(0x1, 0x3); + PERMUTATION_TK3(0x6, 0x0); + PERMUTATION_TK3(0xb, 0x1); + PERMUTATION_TK3(0xd, 0x2); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41td,43th, ... ,53th,55th round + PERMUTATION_TK3(0x4, 0x3); + PERMUTATION_TK3(0x2, 0x1); + PERMUTATION_TK3(0x8, 0x0); + PERMUTATION_TK3(0x2, 0x2); + PERMUTATION_TK3(0x9, 0x0); + PERMUTATION_TK3(0x6, 0x2); + PERMUTATION_TK3(0x9, 0x1); + PERMUTATION_TK3(0x5, 0x2); + +#endif + + // even + + // load master key + w0 = *(uint32_t*)&roundKeys[40]; + w1 = *(uint32_t*)&roundKeys[44]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[392]; +#else + tk3 = (uint32_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... ,38th,40th round + PERMUTATION_TK3(0x3, 0x0); + PERMUTATION_TK3(0xf, 0x0); + PERMUTATION_TK3(0xe, 0x3); + PERMUTATION_TK3(0xb, 0x3); + PERMUTATION_TK3(0xf, 0x2); + PERMUTATION_TK3(0xc, 0x3); + PERMUTATION_TK3(0x3, 0x3); + PERMUTATION_TK3(0xe, 0x0); + PERMUTATION_TK3(0xa, 0x3); + PERMUTATION_TK3(0xb, 0x2); + + PERMUTATION_TK3(0xc, 0x2); + PERMUTATION_TK3(0x0, 0x3); + PERMUTATION_TK3(0x2, 0x0); + PERMUTATION_TK3(0xb, 0x0); + PERMUTATION_TK3(0xe, 0x2); + PERMUTATION_TK3(0x8, 0x3); + PERMUTATION_TK3(0x3, 0x2); + PERMUTATION_TK3(0xd, 0x0); + PERMUTATION_TK3(0x6, 0x3); + PERMUTATION_TK3(0xa, 0x1); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK3(0x9, 0x2); + PERMUTATION_TK3(0x4, 0x2); + PERMUTATION_TK3(0x1, 0x1); + PERMUTATION_TK3(0x4, 0x0); + PERMUTATION_TK3(0x3, 0x1); + PERMUTATION_TK3(0xc, 0x0); + PERMUTATION_TK3(0x2, 0x3); + PERMUTATION_TK3(0xa, 0x0); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys, unsigned char *pRC) +{ + uint32_t *tk3; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + uint32_t c0; + uint32_t c1; + + // odd + + // load master key + w0 = *(uint32_t*)&roundKeys[32]; + w1 = *(uint32_t*)&roundKeys[36]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[384]; +#else + tk3 = (uint32_t*)&roundKeys[512]; +#endif + + // 1st round + *tk3++ = w0 ^ 0x01; + *tk3++ = w1; + tk3 += 2; + + pRC += 4; + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + + // even + + // load master key + w0 = *(uint32_t*)&roundKeys[40]; + w1 = *(uint32_t*)&roundKeys[44]; + +#ifndef ___NUM_OF_ROUNDS_56 + pRC -= 78; + tk3 = (uint32_t*)&roundKeys[392]; +#else + pRC -= 110; + tk3 = (uint32_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + +} + +#endif /* ___SKINNY_LOOP */ + +#endif /* ___ENABLE_DWORD_CAST */ + diff --git a/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/skinny_main.c b/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/skinny_main.c new file mode 100644 index 0000000..8a6e75f --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/skinny_main.c @@ -0,0 +1,675 @@ +/****************************************************************************** + * Copyright (c) 2020, NEC Corporation. + * + * THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND. + * + *****************************************************************************/ + +/* + * SKINNY-128-384 + * + * ART(TK1) -> store + * load AC(c0 c1) ^ TK3 ^ TK2 + * load TK1 + * calc AC(c0 c1) ^ TK3 ^ TK2 ^ TK1 -> use at (AC->ART) + * SC->SR->(AC->ART)->MC + * + * number of rounds : 40 or 56 + */ + +#include "skinny.h" + +/* + * S-BOX + */ +unsigned char SBOX[] += { + // Original + 0x65, 0x4c, 0x6a, 0x42, 0x4b, 0x63, 0x43, 0x6b, 0x55, 0x75, 0x5a, 0x7a, 0x53, 0x73, 0x5b, 0x7b, + 0x35, 0x8c, 0x3a, 0x81, 0x89, 0x33, 0x80, 0x3b, 0x95, 0x25, 0x98, 0x2a, 0x90, 0x23, 0x99, 0x2b, + 0xe5, 0xcc, 0xe8, 0xc1, 0xc9, 0xe0, 0xc0, 0xe9, 0xd5, 0xf5, 0xd8, 0xf8, 0xd0, 0xf0, 0xd9, 0xf9, + 0xa5, 0x1c, 0xa8, 0x12, 0x1b, 0xa0, 0x13, 0xa9, 0x05, 0xb5, 0x0a, 0xb8, 0x03, 0xb0, 0x0b, 0xb9, + 0x32, 0x88, 0x3c, 0x85, 0x8d, 0x34, 0x84, 0x3d, 0x91, 0x22, 0x9c, 0x2c, 0x94, 0x24, 0x9d, 0x2d, + 0x62, 0x4a, 0x6c, 0x45, 0x4d, 0x64, 0x44, 0x6d, 0x52, 0x72, 0x5c, 0x7c, 0x54, 0x74, 0x5d, 0x7d, + 0xa1, 0x1a, 0xac, 0x15, 0x1d, 0xa4, 0x14, 0xad, 0x02, 0xb1, 0x0c, 0xbc, 0x04, 0xb4, 0x0d, 0xbd, + 0xe1, 0xc8, 0xec, 0xc5, 0xcd, 0xe4, 0xc4, 0xed, 0xd1, 0xf1, 0xdc, 0xfc, 0xd4, 0xf4, 0xdd, 0xfd, + 0x36, 0x8e, 0x38, 0x82, 0x8b, 0x30, 0x83, 0x39, 0x96, 0x26, 0x9a, 0x28, 0x93, 0x20, 0x9b, 0x29, + 0x66, 0x4e, 0x68, 0x41, 0x49, 0x60, 0x40, 0x69, 0x56, 0x76, 0x58, 0x78, 0x50, 0x70, 0x59, 0x79, + 0xa6, 0x1e, 0xaa, 0x11, 0x19, 0xa3, 0x10, 0xab, 0x06, 0xb6, 0x08, 0xba, 0x00, 0xb3, 0x09, 0xbb, + 0xe6, 0xce, 0xea, 0xc2, 0xcb, 0xe3, 0xc3, 0xeb, 0xd6, 0xf6, 0xda, 0xfa, 0xd3, 0xf3, 0xdb, 0xfb, + 0x31, 0x8a, 0x3e, 0x86, 0x8f, 0x37, 0x87, 0x3f, 0x92, 0x21, 0x9e, 0x2e, 0x97, 0x27, 0x9f, 0x2f, + 0x61, 0x48, 0x6e, 0x46, 0x4f, 0x67, 0x47, 0x6f, 0x51, 0x71, 0x5e, 0x7e, 0x57, 0x77, 0x5f, 0x7f, + 0xa2, 0x18, 0xae, 0x16, 0x1f, 0xa7, 0x17, 0xaf, 0x01, 0xb2, 0x0e, 0xbe, 0x07, 0xb7, 0x0f, 0xbf, + 0xe2, 0xca, 0xee, 0xc6, 0xcf, 0xe7, 0xc7, 0xef, 0xd2, 0xf2, 0xde, 0xfe, 0xd7, 0xf7, 0xdf, 0xff, +}; + + /* + * S-BOX ^ AC(c2) + */ +unsigned char SBOX2[] += { // Original ^ c2(0x02) + 0x67, 0x4e, 0x68, 0x40, 0x49, 0x61, 0x41, 0x69, 0x57, 0x77, 0x58, 0x78, 0x51, 0x71, 0x59, 0x79, + 0x37, 0x8e, 0x38, 0x83, 0x8b, 0x31, 0x82, 0x39, 0x97, 0x27, 0x9a, 0x28, 0x92, 0x21, 0x9b, 0x29, + 0xe7, 0xce, 0xea, 0xc3, 0xcb, 0xe2, 0xc2, 0xeb, 0xd7, 0xf7, 0xda, 0xfa, 0xd2, 0xf2, 0xdb, 0xfb, + 0xa7, 0x1e, 0xaa, 0x10, 0x19, 0xa2, 0x11, 0xab, 0x07, 0xb7, 0x08, 0xba, 0x01, 0xb2, 0x09, 0xbb, + 0x30, 0x8a, 0x3e, 0x87, 0x8f, 0x36, 0x86, 0x3f, 0x93, 0x20, 0x9e, 0x2e, 0x96, 0x26, 0x9f, 0x2f, + 0x60, 0x48, 0x6e, 0x47, 0x4f, 0x66, 0x46, 0x6f, 0x50, 0x70, 0x5e, 0x7e, 0x56, 0x76, 0x5f, 0x7f, + 0xa3, 0x18, 0xae, 0x17, 0x1f, 0xa6, 0x16, 0xaf, 0x00, 0xb3, 0x0e, 0xbe, 0x06, 0xb6, 0x0f, 0xbf, + 0xe3, 0xca, 0xee, 0xc7, 0xcf, 0xe6, 0xc6, 0xef, 0xd3, 0xf3, 0xde, 0xfe, 0xd6, 0xf6, 0xdf, 0xff, + 0x34, 0x8c, 0x3a, 0x80, 0x89, 0x32, 0x81, 0x3b, 0x94, 0x24, 0x98, 0x2a, 0x91, 0x22, 0x99, 0x2b, + 0x64, 0x4c, 0x6a, 0x43, 0x4b, 0x62, 0x42, 0x6b, 0x54, 0x74, 0x5a, 0x7a, 0x52, 0x72, 0x5b, 0x7b, + 0xa4, 0x1c, 0xa8, 0x13, 0x1b, 0xa1, 0x12, 0xa9, 0x04, 0xb4, 0x0a, 0xb8, 0x02, 0xb1, 0x0b, 0xb9, + 0xe4, 0xcc, 0xe8, 0xc0, 0xc9, 0xe1, 0xc1, 0xe9, 0xd4, 0xf4, 0xd8, 0xf8, 0xd1, 0xf1, 0xd9, 0xf9, + 0x33, 0x88, 0x3c, 0x84, 0x8d, 0x35, 0x85, 0x3d, 0x90, 0x23, 0x9c, 0x2c, 0x95, 0x25, 0x9d, 0x2d, + 0x63, 0x4a, 0x6c, 0x44, 0x4d, 0x65, 0x45, 0x6d, 0x53, 0x73, 0x5c, 0x7c, 0x55, 0x75, 0x5d, 0x7d, + 0xa0, 0x1a, 0xac, 0x14, 0x1d, 0xa5, 0x15, 0xad, 0x03, 0xb0, 0x0c, 0xbc, 0x05, 0xb5, 0x0d, 0xbd, + 0xe0, 0xc8, 0xec, 0xc4, 0xcd, 0xe5, 0xc5, 0xed, 0xd0, 0xf0, 0xdc, 0xfc, 0xd5, 0xf5, 0xdd, 0xfd, +}; + +#ifdef ___SKINNY_LOOP +/* + * Round Constants + */ +unsigned char RC[] += { + 0x01, 0x00, 0x03, 0x00, 0x07, 0x00, 0x0f, 0x00, 0x0f, 0x01, 0x0e, 0x03, 0x0d, 0x03, 0x0b, 0x03, + 0x07, 0x03, 0x0f, 0x02, 0x0e, 0x01, 0x0c, 0x03, 0x09, 0x03, 0x03, 0x03, 0x07, 0x02, 0x0e, 0x00, + 0x0d, 0x01, 0x0a, 0x03, 0x05, 0x03, 0x0b, 0x02, 0x06, 0x01, 0x0c, 0x02, 0x08, 0x01, 0x00, 0x03, + 0x01, 0x02, 0x02, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x07, 0x01, 0x0e, 0x02, 0x0c, 0x01, 0x08, 0x03, + 0x01, 0x03, 0x03, 0x02, 0x06, 0x00, 0x0d, 0x00, 0x0b, 0x01, 0x06, 0x03, 0x0d, 0x02, 0x0a, 0x01, +#ifdef ___NUM_OF_ROUNDS_56 + 0x04, 0x03, 0x09, 0x02, 0x02, 0x01, 0x04, 0x02, 0x08, 0x00, 0x01, 0x01, 0x02, 0x02, 0x04, 0x00, + 0x09, 0x00, 0x03, 0x01, 0x06, 0x02, 0x0c, 0x00, 0x09, 0x01, 0x02, 0x03, 0x05, 0x02, 0x0a, 0x00, +#endif + }; +#endif + +extern void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2); +extern void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys); +#ifdef ___SKINNY_LOOP +extern void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys, unsigned char *pRC); +#else +extern void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys); +#endif + +void skinny_128_384_enc123_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K) +{ + uint32_t *pt = (uint32_t*)&pskinny_ctrl->roundKeys[0]; + + pt[0] = *(uint32_t*)(&CNT[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + pt[4] = *(uint32_t*)(&T[0]); + pack_word(T[7], T[4], T[5], T[6], pt[5]); + pt[6] = *(uint32_t*)(&T[8]); + pack_word(T[15], T[12], T[13], T[14], pt[7]); + + pt[8] = *(uint32_t*)(&K[0]); + pack_word(K[7], K[4], K[5], K[6], pt[9]); + pt[10] = *(uint32_t*)(&K[8]); + pack_word(K[15], K[12], K[13], K[14], pt[11]); + +#ifdef ___SKINNY_LOOP + RunEncryptionKeyScheduleTK3(pskinny_ctrl->roundKeys, RC); +#else + RunEncryptionKeyScheduleTK3(pskinny_ctrl->roundKeys); +#endif + RunEncryptionKeyScheduleTK2(pskinny_ctrl->roundKeys); + Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2); + + pskinny_ctrl->func_skinny_128_384_enc = skinny_128_384_enc12_12; + +} + +void skinny_128_384_enc12_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K) +{ + (void)K; + + uint32_t *pt = &pskinny_ctrl->roundKeys[0]; + + pt[0] = *(uint32_t*)(&CNT[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + pt[4] = *(uint32_t*)(&T[0]); + pack_word(T[7], T[4], T[5], T[6], pt[5]); + pt[6] = *(uint32_t*)(&T[8]); + pack_word(T[15], T[12], T[13], T[14], pt[7]); + + RunEncryptionKeyScheduleTK2(pskinny_ctrl->roundKeys); + Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2); + +} + +extern void skinny_128_384_enc1_1 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K) +{ + (void)T; + (void)K; + + uint32_t *pt = &pskinny_ctrl->roundKeys[0]; + + pt[0] = *(uint32_t*)(&CNT[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2); + +} + +#define PERMUTATION_TK1() \ + \ +/* permutation */ \ +{ \ + unsigned char tmp0 = roundKeys[0]; \ + unsigned char tmp1 = roundKeys[1]; \ + unsigned char tmp2 = roundKeys[2]; \ + unsigned char tmp3 = roundKeys[3]; \ + unsigned char tmp4 = roundKeys[4]; \ + unsigned char tmp5 = roundKeys[5]; \ + unsigned char tmp6 = roundKeys[6]; \ + unsigned char tmp7 = roundKeys[7]; \ + \ + unsigned char* dst = &roundKeys[8]; \ + \ + /* 5 7 2 3 6 0 4 1 */ \ + *dst++ = tmp1; \ + *dst++ = tmp4; \ + *dst++ = tmp0; \ + *dst++ = tmp6; \ + *dst++ = tmp3; \ + *dst++ = tmp2; \ + *dst++ = tmp7; \ + *dst++ = tmp5; \ + \ + /* 2 5 0 6 7 1 3 4 */ \ + *dst++ = tmp4; \ + *dst++ = tmp3; \ + *dst++ = tmp1; \ + *dst++ = tmp7; \ + *dst++ = tmp6; \ + *dst++ = tmp0; \ + *dst++ = tmp5; \ + *dst++ = tmp2; \ + \ + /* 0 2 1 7 5 4 6 3 */ \ + *dst++ = tmp3; \ + *dst++ = tmp6; \ + *dst++ = tmp4; \ + *dst++ = tmp5; \ + *dst++ = tmp7; \ + *dst++ = tmp1; \ + *dst++ = tmp2; \ + *dst++ = tmp0; \ + \ + /* 1 0 4 5 2 3 7 6 */ \ + *dst++ = tmp6; \ + *dst++ = tmp7; \ + *dst++ = tmp3; \ + *dst++ = tmp2; \ + *dst++ = tmp5; \ + *dst++ = tmp4; \ + *dst++ = tmp0; \ + *dst++ = tmp1; \ + \ + /* 4 1 3 2 0 6 5 7 */ \ + *dst++ = tmp7; \ + *dst++ = tmp5; \ + *dst++ = tmp6; \ + *dst++ = tmp0; \ + *dst++ = tmp2; \ + *dst++ = tmp3; \ + *dst++ = tmp1; \ + *dst++ = tmp4; \ + \ + /* 3 4 6 0 1 7 2 5 */ \ + *dst++ = tmp5; \ + *dst++ = tmp2; \ + *dst++ = tmp7; \ + *dst++ = tmp1; \ + *dst++ = tmp0; \ + *dst++ = tmp6; \ + *dst++ = tmp4; \ + *dst++ = tmp3; \ + \ + /* 6 3 7 1 4 5 0 2 */ \ + *dst++ = tmp2; \ + *dst++ = tmp0; \ + *dst++ = tmp5; \ + *dst++ = tmp4; \ + *dst++ = tmp1; \ + *dst++ = tmp7; \ + *dst++ = tmp3; \ + *dst++ = tmp6; \ +} + +#define SBOX_0(b0, b1, b2, b3) \ + \ + t0 = sbox[b0]; \ + t1 = sbox[b1]; \ + t2 = sbox[b2]; \ + t3 = sbox[b3]; \ + \ + b0 = (uint8_t)t0; \ + b1 = (uint8_t)t1; \ + b2 = (uint8_t)t2; \ + b3 = (uint8_t)t3; + +#define SBOX_8(b0, b1, b2, b3) \ + \ + t0 = sbox[b0]; \ + t1 = sbox[b1]; \ + t2 = sbox[b2]; \ + t3 = sbox[b3]; \ + \ + b0 = (uint8_t)t3; \ + b1 = (uint8_t)t0; \ + b2 = (uint8_t)t1; \ + b3 = (uint8_t)t2; + +#define SBOX_16(b0, b1, b2, b3) \ + \ + t0 = sbox2[b0]; /* AC(c2) */ \ + t1 = sbox[b1]; \ + t2 = sbox[b2]; \ + t3 = sbox[b3]; \ + \ + b0 = (uint8_t)t2; \ + b1 = (uint8_t)t3; \ + b2 = (uint8_t)t0; \ + b3 = (uint8_t)t1; + +#define SBOX_24(b0, b1, b2, b3) \ + \ + t0 = sbox[b0]; \ + t1 = sbox[b1]; \ + t2 = sbox[b2]; \ + t3 = sbox[b3]; \ + \ + b0 = (uint8_t)t1; \ + b1 = (uint8_t)t2; \ + b2 = (uint8_t)t3; \ + b3 = (uint8_t)t0; + +#ifdef ___ENABLE_DWORD_CAST + +#define SKINNY_MAIN() \ +{ \ + \ + /* odd */ \ + \ + /* LUT(with ShiftRows & AC(c2))*/ \ + \ + SBOX_0( block[0], block[1], block[2], block[3]); \ + SBOX_8( block[4], block[5], block[6], block[7]); \ + SBOX_16(block[8], block[9], block[10], block[11]); \ + SBOX_24(block[12], block[13], block[14], block[15]); \ + \ + /* TK1^TK2^TK3^AC(c0 c1) */ \ + \ + t1 = *(uint64_t*)&block[0]; \ + t1 ^= *tk1++; \ + t1 ^= *tk2++; \ + \ + /* MC */ \ + \ + t2 = *(uint64_t*)&block[8]; \ + t0 = t2 >> 32; \ + \ + /* 0^2 */ \ + t3 = t1 ^ t2; \ + \ + /* 1^2 */ \ + t2 = (t1 >> 32) ^ t2; \ + \ + /* 0^2^3 */ \ + t0 = t0 ^ t3; \ + \ + *(uint32_t*)&block[0] = (uint32_t)t0; \ + *(uint32_t*)&block[4] = (uint32_t)t1; \ + *(uint32_t*)&block[8] = (uint32_t)t2; \ + *(uint32_t*)&block[12] = (uint32_t)t3; \ + \ + /* even */ \ + \ + /* LUT(with ShiftRows & AC(c2))*/ \ + \ + SBOX_0( block[0], block[1], block[2], block[3]); \ + SBOX_8( block[4], block[5], block[6], block[7]); \ + SBOX_16(block[8], block[9], block[10], block[11]); \ + SBOX_24(block[12], block[13], block[14], block[15]); \ + \ + /* TK2^TK3^AC(c0 c1) */ \ + \ + t1 = *(uint64_t*)&block[0]; \ + t1 ^= *tk2++; \ + \ + /* MC */ \ + \ + t2 = *(uint64_t*)&block[8]; \ + t0 = t2 >> 32; \ + \ + /* 0^2 */ \ + t3 = t1 ^ t2; \ + \ + /* 1^2 */ \ + t2 = (t1 >> 32) ^ t2; \ + \ + /* 0^2^3 */ \ + t0 = t0 ^ t3; \ + \ + *(uint32_t*)&block[0] = (uint32_t)t0; \ + *(uint32_t*)&block[4] = (uint32_t)t1; \ + *(uint32_t*)&block[8] = (uint32_t)t2; \ + *(uint32_t*)&block[12] = (uint32_t)t3; \ +} + +#ifndef ___SKINNY_LOOP + +void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint64_t *tk1; + uint64_t *tk2; + uint64_t t0; // used in MACRO + uint64_t t1; // used in MACRO + uint64_t t2; // used in MACRO + uint64_t t3; // used in MACRO + +// TK1 + + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + + tk2 = (uint64_t*)&roundKeys[64]; + tk1 = (uint64_t*)&roundKeys[0]; + + // 1st, ...,16th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint64_t*)&roundKeys[0]; + + // 17th, ...,32th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint64_t*)&roundKeys[0]; + + // 33th, ...,40th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th, ...,48th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint64_t*)&roundKeys[0]; + + // 49th, ... ,56th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint64_t *tk1; + uint64_t *tk2; + uint64_t t0; // used in MACRO + uint64_t t1; // used in MACRO + uint64_t t2; // used in MACRO + uint64_t t3; // used in MACRO + +// TK1 + + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + + tk2 = (uint64_t*)&roundKeys[64]; + + // 1st, ... ,32th or 48th round +#ifndef ___NUM_OF_ROUNDS_56 + for(int j=0;j<2;j++) +#else + for(int j=0;j<3;j++) +#endif + { + tk1 = (uint64_t*)&roundKeys[0]; + for(int i=0;i<8;i++) + { + SKINNY_MAIN(); + } + } + + // 33th , ... ,40th or 49th, .... ,56th round + { + tk1 = (uint64_t*)&roundKeys[0]; + for(int i=0;i<4;i++) + { + SKINNY_MAIN(); + } + } +} + +#endif /* ___SKINNY_LOOP */ + +#else /* ___ENABLE_DWORD_CAST */ + +#define SKINNY_MAIN() \ +{ \ + \ + /* odd */ \ + \ + /* LUT(with ShiftRows & AC(c2))*/ \ + \ + SBOX_0( block[0], block[1], block[2], block[3]); \ + SBOX_8( block[4], block[5], block[6], block[7]); \ + SBOX_16(block[8], block[9], block[10], block[11]); \ + SBOX_24(block[12], block[13], block[14], block[15]); \ + \ + /* TK1^TK2^TK3^AC(c0 c1) */ \ + \ + t1 = *(uint32_t*)&block[0]; \ + t0 = *(uint32_t*)&block[4]; \ + t1 ^= *tk1++; \ + t1 ^= *tk2++; \ + t0 ^= *tk1++; \ + t0 ^= *tk2++; \ + \ + /* MC */ \ + \ + t2 = *(uint32_t*)&block[8]; \ + t4 = *(uint32_t*)&block[12]; \ + \ + /* 0^2 */ \ + t3 = t1 ^ t2; \ + \ + /* 1^2 */ \ + t2 = t0 ^ t2; \ + \ + /* 0^2^3 */ \ + t0 = t3 ^ t4; \ + \ + *(uint32_t*)&block[0] = t0; \ + *(uint32_t*)&block[4] = t1; \ + *(uint32_t*)&block[8] = t2; \ + *(uint32_t*)&block[12] = t3; \ + \ + /* even */ \ + \ + /* LUT(with ShiftRows & AC(c2))*/ \ + \ + SBOX_0( block[0], block[1], block[2], block[3]); \ + SBOX_8( block[4], block[5], block[6], block[7]); \ + SBOX_16(block[8], block[9], block[10], block[11]); \ + SBOX_24(block[12], block[13], block[14], block[15]); \ + \ + /* TK2^TK3^AC(c0 c1) */ \ + \ + t1 = *(uint32_t*)&block[0]; \ + t0 = *(uint32_t*)&block[4]; \ + t1 ^= *tk2++; \ + t0 ^= *tk2++; \ + \ + /* MC */ \ + \ + t2 = *(uint32_t*)&block[8]; \ + t4 = *(uint32_t*)&block[12]; \ + \ + /* 0^2 */ \ + t3 = t1 ^ t2; \ + \ + /* 1^2 */ \ + t2 = t0 ^ t2; \ + \ + /* 0^2^3 */ \ + t0 = t3 ^ t4; \ + \ + *(uint32_t*)&block[0] = t0; \ + *(uint32_t*)&block[4] = t1; \ + *(uint32_t*)&block[8] = t2; \ + *(uint32_t*)&block[12] = t3; \ +} + +#ifndef ___SKINNY_LOOP + +void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint32_t *tk1; + uint32_t *tk2; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t t3; // used in MACRO + uint32_t t4; // used in MACRO + +// TK1 + + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + + tk2 = (uint32_t*)&roundKeys[64]; + tk1 = (uint32_t*)&roundKeys[0]; + + // 1st, ...,16th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint32_t*)&roundKeys[0]; + + // 17th, ...,32th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint32_t*)&roundKeys[0]; + + // 33th, ...,40th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th, ...,48th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint32_t*)&roundKeys[0]; + + // 49th, ... ,56th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint32_t *tk1; + uint32_t *tk2; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t t3; // used in MACRO + uint32_t t4; // used in MACRO + +// TK1 + + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + + tk2 = (uint32_t*)&roundKeys[64]; + + // 1st, ... ,32th or 48th round +#ifndef ___NUM_OF_ROUNDS_56 + for(int j=0;j<2;j++) +#else + for(int j=0;j<3;j++) +#endif + { + tk1 = (uint32_t*)&roundKeys[0]; + for(int i=0;i<8;i++) + { + SKINNY_MAIN(); + } + } + + // 33th , ... ,40th or 49th, .... ,56th round + { + tk1 = (uint32_t*)&roundKeys[0]; + for(int i=0;i<4;i++) + { + SKINNY_MAIN(); + } + } +} + +#endif /* ___SKINNY_LOOP */ + +#endif /* ___ENABLE_DWORD_CAST */ + diff --git a/romulus/Implementations/crypto_aead/romulusn1/rhys/internal-skinny128.c b/romulus/Implementations/crypto_aead/romulusn1/rhys/internal-skinny128.c index 579ced1..d4adca0 100644 --- a/romulus/Implementations/crypto_aead/romulusn1/rhys/internal-skinny128.c +++ b/romulus/Implementations/crypto_aead/romulusn1/rhys/internal-skinny128.c @@ -90,7 +90,7 @@ void skinny_128_384_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -99,11 +99,25 @@ void skinny_128_384_init schedule[0] = TK2[0] ^ TK3[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ TK3[1] ^ (rc >> 4); - /* Permute TK2 and TK3 for the next round */ - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); + /* Permute the bottom half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_permute_tk_half(TK3[2], TK3[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + skinny128_LFSR3(TK3[2]); + skinny128_LFSR3(TK3[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ TK3[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ TK3[3] ^ (rc >> 4); - /* Apply the LFSR's to TK2 and TK3 */ + /* Permute the top half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); + skinny128_permute_tk_half(TK3[0], TK3[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR3(TK3[0]); @@ -112,6 +126,98 @@ void skinny_128_384_init #endif } +/** + * \brief Performs an unrolled round for Skinny-128-384 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + } while (0) + +/** + * \brief Performs an unrolled round for Skinny-128-384 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK3[(1 - half) * 2]); \ + skinny128_LFSR3(TK3[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_384_encrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -125,7 +231,6 @@ void skinny_128_384_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -150,53 +255,19 @@ void skinny_128_384_encrypt TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_384_round(s0, s1, s2, s3, 0, 0); + skinny_128_384_round(s3, s0, s1, s2, 1, 1); + skinny_128_384_round(s2, s3, s0, s1, 0, 2); + skinny_128_384_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -207,6 +278,93 @@ void skinny_128_384_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Inverse permutation on TK1 for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + s1 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s2 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK3[(1 - half) * 2]); \ + skinny128_LFSR2(TK3[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_384_decrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -218,9 +376,8 @@ void skinny_128_384_decrypt uint32_t TK3[4]; uint8_t rc = 0x15; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -251,7 +408,7 @@ void skinny_128_384_decrypt skinny128_fast_forward_tk(TK2); skinny128_fast_forward_tk(TK3); for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2 and TK3. + /* Also fast-forward the LFSR's on every byte of TK2 and TK3 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -263,50 +420,20 @@ void skinny_128_384_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_inv_permute_tk(TK3); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); - skinny128_LFSR2(TK3[2]); - skinny128_LFSR2(TK3[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); + skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_384_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_384_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_384_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_384_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_384_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_384_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_384_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -316,6 +443,57 @@ void skinny_128_384_decrypt le_store_word32(output + 12, s3); } +/** + * \def skinny_128_384_round_tk2(s0, s1, s2, s3, half) + * \brief Performs an unrolled round for skinny_128_384_encrypt_tk2(). + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#if SKINNY_128_SMALL_SCHEDULE +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round_tk_full(s0, s1, s2, s3, half) +#else /* !SKINNY_128_SMALL_SCHEDULE */ +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2] ^ TK2[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1] ^ \ + TK2[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 and TK2 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) +#endif /* !SKINNY_128_SMALL_SCHEDULE */ + void skinny_128_384_encrypt_tk2 (skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input, const unsigned char *tk2) @@ -329,7 +507,6 @@ void skinny_128_384_encrypt_tk2 #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -354,53 +531,14 @@ void skinny_128_384_encrypt_tk2 TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0] ^ TK2[0]; - s1 ^= schedule[1] ^ TK1[1] ^ TK2[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK3); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); -#else - schedule += 2; + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk2(s0, s1, s2, s3, 0, 0); + skinny_128_384_round_tk2(s3, s0, s1, s2, 1, 1); + skinny_128_384_round_tk2(s2, s3, s0, s1, 0, 2); + skinny_128_384_round_tk2(s1, s2, s3, s0, 1, 3); +#if !SKINNY_128_SMALL_SCHEDULE + schedule += 8; #endif } @@ -419,7 +557,6 @@ void skinny_128_384_encrypt_tk_full uint32_t TK1[4]; uint32_t TK2[4]; uint32_t TK3[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -443,45 +580,12 @@ void skinny_128_384_encrypt_tk_full TK3[2] = le_load_word32(key + 40); TK3[3] = le_load_word32(key + 44); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ @@ -518,7 +622,7 @@ void skinny_128_256_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -527,16 +631,87 @@ void skinny_128_256_init schedule[0] = TK2[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ (rc >> 4); - /* Permute TK2 for the next round */ - skinny128_permute_tk(TK2); + /* Permute the bottom half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ (rc >> 4); - /* Apply the LFSR to TK2 */ + /* Permute the top half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); } #endif } +/** + * \brief Performs an unrolled round for Skinny-128-256 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled round for Skinny-128-256 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_256_encrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -549,7 +724,6 @@ void skinny_128_256_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -570,50 +744,19 @@ void skinny_128_256_encrypt TK2[3] = le_load_word32(ks->TK2 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_256_round(s0, s1, s2, s3, 0, 0); + skinny_128_256_round(s3, s0, s1, s2, 1, 1); + skinny_128_256_round(s2, s3, s0, s1, 0, 2); + skinny_128_256_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -624,6 +767,63 @@ void skinny_128_256_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_inv_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_256_decrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -634,9 +834,8 @@ void skinny_128_256_decrypt uint32_t TK2[4]; uint8_t rc = 0x09; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -658,7 +857,7 @@ void skinny_128_256_decrypt TK2[2] = le_load_word32(ks->TK2 + 8); TK2[3] = le_load_word32(ks->TK2 + 12); for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2. + /* Also fast-forward the LFSR's on every byte of TK2 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -666,47 +865,20 @@ void skinny_128_256_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); + skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_256_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_256_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_256_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_256_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_256_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_256_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_256_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -723,7 +895,6 @@ void skinny_128_256_encrypt_tk_full uint32_t s0, s1, s2, s3; uint32_t TK1[4]; uint32_t TK2[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -743,42 +914,12 @@ void skinny_128_256_encrypt_tk_full TK2[2] = le_load_word32(key + 24); TK2[3] = le_load_word32(key + 28); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ diff --git a/romulus/Implementations/crypto_aead/romulusn1/rhys/internal-skinnyutil.h b/romulus/Implementations/crypto_aead/romulusn1/rhys/internal-skinnyutil.h index 83136cb..8a5296d 100644 --- a/romulus/Implementations/crypto_aead/romulusn1/rhys/internal-skinnyutil.h +++ b/romulus/Implementations/crypto_aead/romulusn1/rhys/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: diff --git a/romulus/Implementations/crypto_aead/romulusn2/rhys/internal-skinny128.c b/romulus/Implementations/crypto_aead/romulusn2/rhys/internal-skinny128.c index 579ced1..d4adca0 100644 --- a/romulus/Implementations/crypto_aead/romulusn2/rhys/internal-skinny128.c +++ b/romulus/Implementations/crypto_aead/romulusn2/rhys/internal-skinny128.c @@ -90,7 +90,7 @@ void skinny_128_384_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -99,11 +99,25 @@ void skinny_128_384_init schedule[0] = TK2[0] ^ TK3[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ TK3[1] ^ (rc >> 4); - /* Permute TK2 and TK3 for the next round */ - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); + /* Permute the bottom half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_permute_tk_half(TK3[2], TK3[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + skinny128_LFSR3(TK3[2]); + skinny128_LFSR3(TK3[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ TK3[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ TK3[3] ^ (rc >> 4); - /* Apply the LFSR's to TK2 and TK3 */ + /* Permute the top half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); + skinny128_permute_tk_half(TK3[0], TK3[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR3(TK3[0]); @@ -112,6 +126,98 @@ void skinny_128_384_init #endif } +/** + * \brief Performs an unrolled round for Skinny-128-384 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + } while (0) + +/** + * \brief Performs an unrolled round for Skinny-128-384 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK3[(1 - half) * 2]); \ + skinny128_LFSR3(TK3[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_384_encrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -125,7 +231,6 @@ void skinny_128_384_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -150,53 +255,19 @@ void skinny_128_384_encrypt TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_384_round(s0, s1, s2, s3, 0, 0); + skinny_128_384_round(s3, s0, s1, s2, 1, 1); + skinny_128_384_round(s2, s3, s0, s1, 0, 2); + skinny_128_384_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -207,6 +278,93 @@ void skinny_128_384_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Inverse permutation on TK1 for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + s1 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s2 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK3[(1 - half) * 2]); \ + skinny128_LFSR2(TK3[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_384_decrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -218,9 +376,8 @@ void skinny_128_384_decrypt uint32_t TK3[4]; uint8_t rc = 0x15; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -251,7 +408,7 @@ void skinny_128_384_decrypt skinny128_fast_forward_tk(TK2); skinny128_fast_forward_tk(TK3); for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2 and TK3. + /* Also fast-forward the LFSR's on every byte of TK2 and TK3 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -263,50 +420,20 @@ void skinny_128_384_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_inv_permute_tk(TK3); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); - skinny128_LFSR2(TK3[2]); - skinny128_LFSR2(TK3[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); + skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_384_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_384_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_384_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_384_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_384_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_384_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_384_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -316,6 +443,57 @@ void skinny_128_384_decrypt le_store_word32(output + 12, s3); } +/** + * \def skinny_128_384_round_tk2(s0, s1, s2, s3, half) + * \brief Performs an unrolled round for skinny_128_384_encrypt_tk2(). + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#if SKINNY_128_SMALL_SCHEDULE +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round_tk_full(s0, s1, s2, s3, half) +#else /* !SKINNY_128_SMALL_SCHEDULE */ +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2] ^ TK2[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1] ^ \ + TK2[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 and TK2 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) +#endif /* !SKINNY_128_SMALL_SCHEDULE */ + void skinny_128_384_encrypt_tk2 (skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input, const unsigned char *tk2) @@ -329,7 +507,6 @@ void skinny_128_384_encrypt_tk2 #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -354,53 +531,14 @@ void skinny_128_384_encrypt_tk2 TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0] ^ TK2[0]; - s1 ^= schedule[1] ^ TK1[1] ^ TK2[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK3); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); -#else - schedule += 2; + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk2(s0, s1, s2, s3, 0, 0); + skinny_128_384_round_tk2(s3, s0, s1, s2, 1, 1); + skinny_128_384_round_tk2(s2, s3, s0, s1, 0, 2); + skinny_128_384_round_tk2(s1, s2, s3, s0, 1, 3); +#if !SKINNY_128_SMALL_SCHEDULE + schedule += 8; #endif } @@ -419,7 +557,6 @@ void skinny_128_384_encrypt_tk_full uint32_t TK1[4]; uint32_t TK2[4]; uint32_t TK3[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -443,45 +580,12 @@ void skinny_128_384_encrypt_tk_full TK3[2] = le_load_word32(key + 40); TK3[3] = le_load_word32(key + 44); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ @@ -518,7 +622,7 @@ void skinny_128_256_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -527,16 +631,87 @@ void skinny_128_256_init schedule[0] = TK2[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ (rc >> 4); - /* Permute TK2 for the next round */ - skinny128_permute_tk(TK2); + /* Permute the bottom half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ (rc >> 4); - /* Apply the LFSR to TK2 */ + /* Permute the top half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); } #endif } +/** + * \brief Performs an unrolled round for Skinny-128-256 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled round for Skinny-128-256 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_256_encrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -549,7 +724,6 @@ void skinny_128_256_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -570,50 +744,19 @@ void skinny_128_256_encrypt TK2[3] = le_load_word32(ks->TK2 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_256_round(s0, s1, s2, s3, 0, 0); + skinny_128_256_round(s3, s0, s1, s2, 1, 1); + skinny_128_256_round(s2, s3, s0, s1, 0, 2); + skinny_128_256_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -624,6 +767,63 @@ void skinny_128_256_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_inv_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_256_decrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -634,9 +834,8 @@ void skinny_128_256_decrypt uint32_t TK2[4]; uint8_t rc = 0x09; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -658,7 +857,7 @@ void skinny_128_256_decrypt TK2[2] = le_load_word32(ks->TK2 + 8); TK2[3] = le_load_word32(ks->TK2 + 12); for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2. + /* Also fast-forward the LFSR's on every byte of TK2 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -666,47 +865,20 @@ void skinny_128_256_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); + skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_256_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_256_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_256_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_256_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_256_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_256_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_256_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -723,7 +895,6 @@ void skinny_128_256_encrypt_tk_full uint32_t s0, s1, s2, s3; uint32_t TK1[4]; uint32_t TK2[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -743,42 +914,12 @@ void skinny_128_256_encrypt_tk_full TK2[2] = le_load_word32(key + 24); TK2[3] = le_load_word32(key + 28); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ diff --git a/romulus/Implementations/crypto_aead/romulusn2/rhys/internal-skinnyutil.h b/romulus/Implementations/crypto_aead/romulusn2/rhys/internal-skinnyutil.h index 83136cb..8a5296d 100644 --- a/romulus/Implementations/crypto_aead/romulusn2/rhys/internal-skinnyutil.h +++ b/romulus/Implementations/crypto_aead/romulusn2/rhys/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: diff --git a/romulus/Implementations/crypto_aead/romulusn3/rhys/internal-skinny128.c b/romulus/Implementations/crypto_aead/romulusn3/rhys/internal-skinny128.c index 579ced1..d4adca0 100644 --- a/romulus/Implementations/crypto_aead/romulusn3/rhys/internal-skinny128.c +++ b/romulus/Implementations/crypto_aead/romulusn3/rhys/internal-skinny128.c @@ -90,7 +90,7 @@ void skinny_128_384_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -99,11 +99,25 @@ void skinny_128_384_init schedule[0] = TK2[0] ^ TK3[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ TK3[1] ^ (rc >> 4); - /* Permute TK2 and TK3 for the next round */ - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); + /* Permute the bottom half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_permute_tk_half(TK3[2], TK3[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + skinny128_LFSR3(TK3[2]); + skinny128_LFSR3(TK3[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ TK3[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ TK3[3] ^ (rc >> 4); - /* Apply the LFSR's to TK2 and TK3 */ + /* Permute the top half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); + skinny128_permute_tk_half(TK3[0], TK3[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR3(TK3[0]); @@ -112,6 +126,98 @@ void skinny_128_384_init #endif } +/** + * \brief Performs an unrolled round for Skinny-128-384 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + } while (0) + +/** + * \brief Performs an unrolled round for Skinny-128-384 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK3[(1 - half) * 2]); \ + skinny128_LFSR3(TK3[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_384_encrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -125,7 +231,6 @@ void skinny_128_384_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -150,53 +255,19 @@ void skinny_128_384_encrypt TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_384_round(s0, s1, s2, s3, 0, 0); + skinny_128_384_round(s3, s0, s1, s2, 1, 1); + skinny_128_384_round(s2, s3, s0, s1, 0, 2); + skinny_128_384_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -207,6 +278,93 @@ void skinny_128_384_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Inverse permutation on TK1 for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + s1 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s2 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK3[(1 - half) * 2]); \ + skinny128_LFSR2(TK3[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_384_decrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -218,9 +376,8 @@ void skinny_128_384_decrypt uint32_t TK3[4]; uint8_t rc = 0x15; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -251,7 +408,7 @@ void skinny_128_384_decrypt skinny128_fast_forward_tk(TK2); skinny128_fast_forward_tk(TK3); for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2 and TK3. + /* Also fast-forward the LFSR's on every byte of TK2 and TK3 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -263,50 +420,20 @@ void skinny_128_384_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_inv_permute_tk(TK3); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); - skinny128_LFSR2(TK3[2]); - skinny128_LFSR2(TK3[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); + skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_384_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_384_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_384_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_384_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_384_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_384_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_384_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -316,6 +443,57 @@ void skinny_128_384_decrypt le_store_word32(output + 12, s3); } +/** + * \def skinny_128_384_round_tk2(s0, s1, s2, s3, half) + * \brief Performs an unrolled round for skinny_128_384_encrypt_tk2(). + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#if SKINNY_128_SMALL_SCHEDULE +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round_tk_full(s0, s1, s2, s3, half) +#else /* !SKINNY_128_SMALL_SCHEDULE */ +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2] ^ TK2[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1] ^ \ + TK2[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 and TK2 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) +#endif /* !SKINNY_128_SMALL_SCHEDULE */ + void skinny_128_384_encrypt_tk2 (skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input, const unsigned char *tk2) @@ -329,7 +507,6 @@ void skinny_128_384_encrypt_tk2 #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -354,53 +531,14 @@ void skinny_128_384_encrypt_tk2 TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0] ^ TK2[0]; - s1 ^= schedule[1] ^ TK1[1] ^ TK2[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK3); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); -#else - schedule += 2; + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk2(s0, s1, s2, s3, 0, 0); + skinny_128_384_round_tk2(s3, s0, s1, s2, 1, 1); + skinny_128_384_round_tk2(s2, s3, s0, s1, 0, 2); + skinny_128_384_round_tk2(s1, s2, s3, s0, 1, 3); +#if !SKINNY_128_SMALL_SCHEDULE + schedule += 8; #endif } @@ -419,7 +557,6 @@ void skinny_128_384_encrypt_tk_full uint32_t TK1[4]; uint32_t TK2[4]; uint32_t TK3[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -443,45 +580,12 @@ void skinny_128_384_encrypt_tk_full TK3[2] = le_load_word32(key + 40); TK3[3] = le_load_word32(key + 44); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ @@ -518,7 +622,7 @@ void skinny_128_256_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -527,16 +631,87 @@ void skinny_128_256_init schedule[0] = TK2[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ (rc >> 4); - /* Permute TK2 for the next round */ - skinny128_permute_tk(TK2); + /* Permute the bottom half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ (rc >> 4); - /* Apply the LFSR to TK2 */ + /* Permute the top half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); } #endif } +/** + * \brief Performs an unrolled round for Skinny-128-256 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled round for Skinny-128-256 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_256_encrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -549,7 +724,6 @@ void skinny_128_256_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -570,50 +744,19 @@ void skinny_128_256_encrypt TK2[3] = le_load_word32(ks->TK2 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_256_round(s0, s1, s2, s3, 0, 0); + skinny_128_256_round(s3, s0, s1, s2, 1, 1); + skinny_128_256_round(s2, s3, s0, s1, 0, 2); + skinny_128_256_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -624,6 +767,63 @@ void skinny_128_256_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_inv_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_256_decrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -634,9 +834,8 @@ void skinny_128_256_decrypt uint32_t TK2[4]; uint8_t rc = 0x09; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -658,7 +857,7 @@ void skinny_128_256_decrypt TK2[2] = le_load_word32(ks->TK2 + 8); TK2[3] = le_load_word32(ks->TK2 + 12); for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2. + /* Also fast-forward the LFSR's on every byte of TK2 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -666,47 +865,20 @@ void skinny_128_256_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); + skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_256_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_256_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_256_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_256_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_256_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_256_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_256_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -723,7 +895,6 @@ void skinny_128_256_encrypt_tk_full uint32_t s0, s1, s2, s3; uint32_t TK1[4]; uint32_t TK2[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -743,42 +914,12 @@ void skinny_128_256_encrypt_tk_full TK2[2] = le_load_word32(key + 24); TK2[3] = le_load_word32(key + 28); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ diff --git a/romulus/Implementations/crypto_aead/romulusn3/rhys/internal-skinnyutil.h b/romulus/Implementations/crypto_aead/romulusn3/rhys/internal-skinnyutil.h index 83136cb..8a5296d 100644 --- a/romulus/Implementations/crypto_aead/romulusn3/rhys/internal-skinnyutil.h +++ b/romulus/Implementations/crypto_aead/romulusn3/rhys/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: diff --git a/saturnin/Implementations/crypto_aead/saturninctrcascadev2/rhys/internal-saturnin-avr.S b/saturnin/Implementations/crypto_aead/saturninctrcascadev2/rhys/internal-saturnin-avr.S new file mode 100644 index 0000000..f20ce72 --- /dev/null +++ b/saturnin/Implementations/crypto_aead/saturninctrcascadev2/rhys/internal-saturnin-avr.S @@ -0,0 +1,6365 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_0, @object + .size table_0, 384 +table_0: + .byte 194 + .byte 38 + .byte 176 + .byte 78 + .byte 3 + .byte 83 + .byte 89 + .byte 144 + .byte 50 + .byte 230 + .byte 143 + .byte 170 + .byte 146 + .byte 138 + .byte 146 + .byte 254 + .byte 25 + .byte 164 + .byte 21 + .byte 65 + .byte 50 + .byte 149 + .byte 83 + .byte 147 + .byte 78 + .byte 204 + .byte 177 + .byte 93 + .byte 202 + .byte 21 + .byte 21 + .byte 84 + .byte 168 + .byte 85 + .byte 31 + .byte 189 + .byte 13 + .byte 26 + .byte 110 + .byte 90 + .byte 181 + .byte 38 + .byte 69 + .byte 78 + .byte 240 + .byte 95 + .byte 86 + .byte 163 + .byte 216 + .byte 32 + .byte 143 + .byte 15 + .byte 225 + .byte 190 + .byte 84 + .byte 11 + .byte 157 + .byte 108 + .byte 26 + .byte 125 + .byte 10 + .byte 40 + .byte 166 + .byte 23 + .byte 134 + .byte 201 + .byte 70 + .byte 170 + .byte 98 + .byte 144 + .byte 25 + .byte 193 + .byte 222 + .byte 92 + .byte 44 + .byte 24 + .byte 254 + .byte 83 + .byte 13 + .byte 160 + .byte 152 + .byte 38 + .byte 22 + .byte 78 + .byte 161 + .byte 91 + .byte 83 + .byte 178 + .byte 101 + .byte 157 + .byte 143 + .byte 108 + .byte 48 + .byte 173 + .byte 22 + .byte 88 + .byte 250 + .byte 212 + .byte 31 + .byte 105 + .byte 249 + .byte 188 + .byte 245 + .byte 107 + .byte 37 + .byte 53 + .byte 235 + .byte 248 + .byte 250 + .byte 236 + .byte 29 + .byte 178 + .byte 23 + .byte 164 + .byte 61 + .byte 123 + .byte 180 + .byte 148 + .byte 44 + .byte 246 + .byte 91 + .byte 38 + .byte 175 + .byte 79 + .byte 22 + .byte 70 + .byte 72 + .byte 197 + .byte 33 + .byte 173 + .byte 220 + .byte 69 + .byte 7 + .byte 214 + .byte 139 + .byte 224 + .byte 184 + .byte 253 + .byte 4 + .byte 5 + .byte 87 + .byte 82 + .byte 31 + .byte 30 + .byte 22 + .byte 194 + .byte 251 + .byte 69 + .byte 31 + .byte 155 + .byte 82 + .byte 235 + .byte 50 + .byte 78 + .byte 25 + .byte 82 + .byte 24 + .byte 192 + .byte 152 + .byte 84 + .byte 118 + .byte 38 + .byte 252 + .byte 79 + .byte 71 + .byte 66 + .byte 77 + .byte 212 + .byte 156 + .byte 16 + .byte 220 + .byte 38 + .byte 214 + .byte 197 + .byte 201 + .byte 179 + .byte 223 + .byte 69 + .byte 1 + .byte 17 + .byte 164 + .byte 198 + .byte 76 + .byte 98 + .byte 181 + .byte 62 + .byte 86 + .byte 23 + .byte 135 + .byte 231 + .byte 86 + .byte 152 + .byte 251 + .byte 182 + .byte 8 + .byte 49 + .byte 82 + .byte 7 + .byte 185 + .byte 2 + .byte 1 + .byte 38 + .byte 9 + .byte 79 + .byte 180 + .byte 78 + .byte 66 + .byte 231 + .byte 118 + .byte 214 + .byte 220 + .byte 131 + .byte 165 + .byte 241 + .byte 15 + .byte 70 + .byte 91 + .byte 141 + .byte 14 + .byte 45 + .byte 156 + .byte 123 + .byte 185 + .byte 230 + .byte 125 + .byte 59 + .byte 161 + .byte 224 + .byte 47 + .byte 98 + .byte 90 + .byte 13 + .byte 141 + .byte 191 + .byte 59 + .byte 148 + .byte 161 + .byte 78 + .byte 218 + .byte 248 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 12 + .byte 24 + .byte 186 + .byte 63 + .byte 171 + .byte 185 + .byte 58 + .byte 86 + .byte 239 + .byte 165 + .byte 94 + .byte 18 + .byte 108 + .byte 162 + .byte 157 + .byte 133 + .byte 155 + .byte 119 + .byte 207 + .byte 184 + .byte 147 + .byte 231 + .byte 77 + .byte 125 + .byte 159 + .byte 180 + .byte 239 + .byte 7 + .byte 6 + .byte 83 + .byte 82 + .byte 141 + .byte 171 + .byte 230 + .byte 8 + .byte 30 + .byte 135 + .byte 159 + .byte 114 + .byte 65 + .byte 10 + .byte 239 + .byte 74 + .byte 140 + .byte 167 + .byte 201 + .byte 160 + .byte 74 + .byte 239 + .byte 149 + .byte 58 + .byte 217 + .byte 175 + .byte 210 + .byte 0 + .byte 187 + .byte 240 + .byte 91 + .byte 44 + .byte 182 + .byte 216 + .byte 148 + .byte 109 + .byte 56 + .byte 167 + .byte 25 + .byte 155 + .byte 60 + .byte 148 + .byte 134 + .byte 9 + .byte 169 + .byte 218 + .byte 120 + .byte 248 + .byte 35 + .byte 211 + .byte 71 + .byte 182 + .byte 167 + .byte 120 + .byte 157 + .byte 252 + .byte 116 + .byte 17 + .byte 174 + .byte 202 + .byte 234 + .byte 119 + .byte 166 + .byte 49 + .byte 47 + .byte 84 + .byte 192 + .byte 200 + .byte 76 + .byte 5 + .byte 202 + .byte 81 + .byte 47 + .byte 149 + .byte 241 + .byte 104 + .byte 82 + .byte 43 + .byte 138 + .byte 91 + .byte 79 + .byte 172 + .byte 180 + .byte 20 + .byte 246 + .byte 1 + .byte 84 + .byte 217 + .byte 241 + .byte 104 + .byte 37 + .byte 77 + .byte 118 + .byte 17 + .byte 54 + .byte 73 + .byte 106 + .byte 62 + .byte 156 + .byte 239 + .byte 142 + + .text +.global saturnin_setup_key + .type saturnin_setup_key, @function +saturnin_setup_key: + movw r30,r24 + movw r26,r22 +.L__stack_usage = 2 + ld r18,X+ + ld r19,X+ + st Z,r18 + std Z+1,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+32,r18 + std Z+33,r19 + ld r18,X+ + ld r19,X+ + std Z+4,r18 + std Z+5,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+36,r18 + std Z+37,r19 + ld r18,X+ + ld r19,X+ + std Z+8,r18 + std Z+9,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+40,r18 + std Z+41,r19 + ld r18,X+ + ld r19,X+ + std Z+12,r18 + std Z+13,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+44,r18 + std Z+45,r19 + ld r18,X+ + ld r19,X+ + std Z+16,r18 + std Z+17,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+48,r18 + std Z+49,r19 + ld r18,X+ + ld r19,X+ + std Z+20,r18 + std Z+21,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+52,r18 + std Z+53,r19 + ld r18,X+ + ld r19,X+ + std Z+24,r18 + std Z+25,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+56,r18 + std Z+57,r19 + ld r18,X+ + ld r19,X+ + std Z+28,r18 + std Z+29,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+60,r18 + std Z+61,r19 + ld r18,X+ + ld r19,X+ + std Z+2,r18 + std Z+3,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+34,r18 + std Z+35,r19 + ld r18,X+ + ld r19,X+ + std Z+6,r18 + std Z+7,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+38,r18 + std Z+39,r19 + ld r18,X+ + ld r19,X+ + std Z+10,r18 + std Z+11,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+42,r18 + std Z+43,r19 + ld r18,X+ + ld r19,X+ + std Z+14,r18 + std Z+15,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+46,r18 + std Z+47,r19 + ld r18,X+ + ld r19,X+ + std Z+18,r18 + std Z+19,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+50,r18 + std Z+51,r19 + ld r18,X+ + ld r19,X+ + std Z+22,r18 + std Z+23,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+54,r18 + std Z+55,r19 + ld r18,X+ + ld r19,X+ + std Z+26,r18 + std Z+27,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+58,r18 + std Z+59,r19 + ld r18,X+ + ld r19,X+ + std Z+30,r18 + std Z+31,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+62,r18 + std Z+63,r19 + ret + .size saturnin_setup_key, .-saturnin_setup_key + + .text +.global saturnin_encrypt_block + .type saturnin_encrypt_block, @function +saturnin_encrypt_block: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r23 + push r22 + movw r30,r24 + movw r26,r20 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 52 + ld r2,X+ + ld r3,X+ + ld r0,Z + eor r2,r0 + ldd r0,Z+1 + eor r3,r0 + ld r6,X+ + ld r7,X+ + ldd r0,Z+4 + eor r6,r0 + ldd r0,Z+5 + eor r7,r0 + ld r10,X+ + ld r11,X+ + ldd r0,Z+8 + eor r10,r0 + ldd r0,Z+9 + eor r11,r0 + ld r14,X+ + ld r15,X+ + ldd r0,Z+12 + eor r14,r0 + ldd r0,Z+13 + eor r15,r0 + ld r20,X+ + ld r21,X+ + ldd r0,Z+16 + eor r20,r0 + ldd r0,Z+17 + eor r21,r0 + std Y+17,r20 + std Y+18,r21 + ld r20,X+ + ld r21,X+ + ldd r0,Z+20 + eor r20,r0 + ldd r0,Z+21 + eor r21,r0 + std Y+21,r20 + std Y+22,r21 + ld r20,X+ + ld r21,X+ + ldd r0,Z+24 + eor r20,r0 + ldd r0,Z+25 + eor r21,r0 + std Y+25,r20 + std Y+26,r21 + ld r20,X+ + ld r21,X+ + ldd r0,Z+28 + eor r20,r0 + ldd r0,Z+29 + eor r21,r0 + std Y+29,r20 + std Y+30,r21 + ld r4,X+ + ld r5,X+ + ldd r0,Z+2 + eor r4,r0 + ldd r0,Z+3 + eor r5,r0 + ld r8,X+ + ld r9,X+ + ldd r0,Z+6 + eor r8,r0 + ldd r0,Z+7 + eor r9,r0 + ld r12,X+ + ld r13,X+ + ldd r0,Z+10 + eor r12,r0 + ldd r0,Z+11 + eor r13,r0 + ld r24,X+ + ld r25,X+ + ldd r0,Z+14 + eor r24,r0 + ldd r0,Z+15 + eor r25,r0 + ld r20,X+ + ld r21,X+ + ldd r0,Z+18 + eor r20,r0 + ldd r0,Z+19 + eor r21,r0 + std Y+19,r20 + std Y+20,r21 + ld r20,X+ + ld r21,X+ + ldd r0,Z+22 + eor r20,r0 + ldd r0,Z+23 + eor r21,r0 + std Y+23,r20 + std Y+24,r21 + ld r20,X+ + ld r21,X+ + ldd r0,Z+26 + eor r20,r0 + ldd r0,Z+27 + eor r21,r0 + std Y+27,r20 + std Y+28,r21 + ld r20,X+ + ld r21,X+ + ldd r0,Z+30 + eor r20,r0 + ldd r0,Z+31 + eor r21,r0 + std Y+31,r20 + std Y+32,r21 + ldi r16,5 + cpi r18,60 + cpc r19,r1 + brcs 120f + ldi r16,8 + ldi r17,4 + add r18,r17 + adc r19,r1 +120: + lsl r18 + rol r19 + lsl r18 + rol r19 + rjmp 1447f +126: + mov r0,r6 + and r0,r10 + eor r2,r0 + mov r0,r7 + and r0,r11 + eor r3,r0 + mov r0,r8 + and r0,r12 + eor r4,r0 + mov r0,r9 + and r0,r13 + eor r5,r0 + mov r0,r2 + or r0,r14 + eor r6,r0 + mov r0,r3 + or r0,r15 + eor r7,r0 + mov r0,r4 + or r0,r24 + eor r8,r0 + mov r0,r5 + or r0,r25 + eor r9,r0 + mov r0,r6 + or r0,r10 + eor r14,r0 + mov r0,r7 + or r0,r11 + eor r15,r0 + mov r0,r8 + or r0,r12 + eor r24,r0 + mov r0,r9 + or r0,r13 + eor r25,r0 + mov r0,r6 + and r0,r14 + eor r10,r0 + mov r0,r7 + and r0,r15 + eor r11,r0 + mov r0,r8 + and r0,r24 + eor r12,r0 + mov r0,r9 + and r0,r25 + eor r13,r0 + mov r0,r2 + or r0,r10 + eor r6,r0 + mov r0,r3 + or r0,r11 + eor r7,r0 + mov r0,r4 + or r0,r12 + eor r8,r0 + mov r0,r5 + or r0,r13 + eor r9,r0 + mov r0,r6 + or r0,r14 + eor r2,r0 + mov r0,r7 + or r0,r15 + eor r3,r0 + mov r0,r8 + or r0,r24 + eor r4,r0 + mov r0,r9 + or r0,r25 + eor r5,r0 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Y+17 + ldd r3,Y+18 + ldd r4,Y+19 + ldd r5,Y+20 + ldd r6,Y+21 + ldd r7,Y+22 + ldd r8,Y+23 + ldd r9,Y+24 + ldd r10,Y+25 + ldd r11,Y+26 + ldd r12,Y+27 + ldd r13,Y+28 + ldd r14,Y+29 + ldd r15,Y+30 + ldd r24,Y+31 + ldd r25,Y+32 + mov r0,r6 + and r0,r10 + eor r2,r0 + mov r0,r7 + and r0,r11 + eor r3,r0 + mov r0,r8 + and r0,r12 + eor r4,r0 + mov r0,r9 + and r0,r13 + eor r5,r0 + mov r0,r2 + or r0,r14 + eor r6,r0 + mov r0,r3 + or r0,r15 + eor r7,r0 + mov r0,r4 + or r0,r24 + eor r8,r0 + mov r0,r5 + or r0,r25 + eor r9,r0 + mov r0,r6 + or r0,r10 + eor r14,r0 + mov r0,r7 + or r0,r11 + eor r15,r0 + mov r0,r8 + or r0,r12 + eor r24,r0 + mov r0,r9 + or r0,r13 + eor r25,r0 + mov r0,r6 + and r0,r14 + eor r10,r0 + mov r0,r7 + and r0,r15 + eor r11,r0 + mov r0,r8 + and r0,r24 + eor r12,r0 + mov r0,r9 + and r0,r25 + eor r13,r0 + mov r0,r2 + or r0,r10 + eor r6,r0 + mov r0,r3 + or r0,r11 + eor r7,r0 + mov r0,r4 + or r0,r12 + eor r8,r0 + mov r0,r5 + or r0,r13 + eor r9,r0 + mov r0,r6 + or r0,r14 + eor r2,r0 + mov r0,r7 + or r0,r15 + eor r3,r0 + mov r0,r8 + or r0,r24 + eor r4,r0 + mov r0,r9 + or r0,r25 + eor r5,r0 + ldd r0,Y+5 + eor r0,r14 + std Y+5,r0 + ldd r0,Y+6 + eor r0,r15 + std Y+6,r0 + ldd r0,Y+7 + eor r0,r24 + std Y+7,r0 + ldd r0,Y+8 + eor r0,r25 + std Y+8,r0 + ldd r0,Y+9 + eor r0,r6 + std Y+9,r0 + ldd r0,Y+10 + eor r0,r7 + std Y+10,r0 + ldd r0,Y+11 + eor r0,r8 + std Y+11,r0 + ldd r0,Y+12 + eor r0,r9 + std Y+12,r0 + ldd r0,Y+13 + eor r0,r2 + std Y+13,r0 + ldd r0,Y+14 + eor r0,r3 + std Y+14,r0 + ldd r0,Y+15 + eor r0,r4 + std Y+15,r0 + ldd r0,Y+16 + eor r0,r5 + std Y+16,r0 + ldd r0,Y+1 + eor r0,r10 + std Y+1,r0 + ldd r0,Y+2 + eor r0,r11 + std Y+2,r0 + ldd r0,Y+3 + eor r0,r12 + std Y+3,r0 + ldd r0,Y+4 + eor r0,r13 + std Y+4,r0 + movw r20,r14 + movw r22,r24 + movw r14,r6 + movw r24,r8 + movw r6,r2 + movw r8,r4 + movw r2,r10 + movw r4,r12 + movw r10,r20 + movw r12,r22 + eor r10,r14 + eor r11,r15 + eor r12,r24 + eor r13,r25 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+9 + ldd r23,Y+10 + eor r20,r22 + eor r21,r23 + std Y+5,r22 + std Y+6,r23 + ldd r22,Y+13 + ldd r23,Y+14 + std Y+9,r22 + std Y+10,r23 + ldd r22,Y+1 + ldd r23,Y+2 + std Y+13,r22 + std Y+14,r23 + std Y+1,r20 + std Y+2,r21 + ldd r20,Y+7 + ldd r21,Y+8 + ldd r22,Y+11 + ldd r23,Y+12 + eor r20,r22 + eor r21,r23 + std Y+7,r22 + std Y+8,r23 + ldd r22,Y+15 + ldd r23,Y+16 + std Y+11,r22 + std Y+12,r23 + ldd r22,Y+3 + ldd r23,Y+4 + std Y+15,r22 + std Y+16,r23 + std Y+3,r20 + std Y+4,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+9 + ldd r23,Y+10 + eor r20,r22 + eor r21,r23 + eor r22,r14 + eor r23,r15 + std Y+5,r22 + std Y+6,r23 + ldd r22,Y+13 + ldd r23,Y+14 + eor r22,r6 + eor r23,r7 + std Y+9,r22 + std Y+10,r23 + ldd r22,Y+1 + ldd r23,Y+2 + eor r22,r2 + eor r23,r3 + std Y+13,r22 + std Y+14,r23 + eor r20,r10 + eor r21,r11 + std Y+1,r20 + std Y+2,r21 + ldd r20,Y+7 + ldd r21,Y+8 + ldd r22,Y+11 + ldd r23,Y+12 + eor r20,r22 + eor r21,r23 + eor r22,r24 + eor r23,r25 + std Y+7,r22 + std Y+8,r23 + ldd r22,Y+15 + ldd r23,Y+16 + eor r22,r8 + eor r23,r9 + std Y+11,r22 + std Y+12,r23 + ldd r22,Y+3 + ldd r23,Y+4 + eor r22,r4 + eor r23,r5 + std Y+15,r22 + std Y+16,r23 + eor r20,r12 + eor r21,r13 + std Y+3,r20 + std Y+4,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + mov r0,r6 + and r0,r2 + eor r14,r0 + mov r0,r7 + and r0,r3 + eor r15,r0 + mov r0,r8 + and r0,r4 + eor r24,r0 + mov r0,r9 + and r0,r5 + eor r25,r0 + mov r0,r14 + or r0,r10 + eor r6,r0 + mov r0,r15 + or r0,r11 + eor r7,r0 + mov r0,r24 + or r0,r12 + eor r8,r0 + mov r0,r25 + or r0,r13 + eor r9,r0 + mov r0,r6 + or r0,r2 + eor r10,r0 + mov r0,r7 + or r0,r3 + eor r11,r0 + mov r0,r8 + or r0,r4 + eor r12,r0 + mov r0,r9 + or r0,r5 + eor r13,r0 + mov r0,r6 + and r0,r10 + eor r2,r0 + mov r0,r7 + and r0,r11 + eor r3,r0 + mov r0,r8 + and r0,r12 + eor r4,r0 + mov r0,r9 + and r0,r13 + eor r5,r0 + mov r0,r14 + or r0,r2 + eor r6,r0 + mov r0,r15 + or r0,r3 + eor r7,r0 + mov r0,r24 + or r0,r4 + eor r8,r0 + mov r0,r25 + or r0,r5 + eor r9,r0 + mov r0,r6 + or r0,r10 + eor r14,r0 + mov r0,r7 + or r0,r11 + eor r15,r0 + mov r0,r8 + or r0,r12 + eor r24,r0 + mov r0,r9 + or r0,r13 + eor r25,r0 + movw r20,r14 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r14,r22 + and r15,r22 + lsl r14 + rol r15 + or r14,r20 + or r15,r21 + movw r20,r24 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r24,r22 + and r25,r22 + lsl r24 + rol r25 + lsl r24 + rol r25 + lsl r24 + rol r25 + or r24,r20 + or r25,r21 + movw r20,r6 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r6,r22 + and r7,r22 + lsl r6 + rol r7 + or r6,r20 + or r7,r21 + movw r20,r8 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r8,r22 + and r9,r22 + lsl r8 + rol r9 + lsl r8 + rol r9 + lsl r8 + rol r9 + or r8,r20 + or r9,r21 + movw r20,r2 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r2,r22 + and r3,r22 + lsl r2 + rol r3 + or r2,r20 + or r3,r21 + movw r20,r4 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r4,r22 + and r5,r22 + lsl r4 + rol r5 + lsl r4 + rol r5 + lsl r4 + rol r5 + or r4,r20 + or r5,r21 + movw r20,r10 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r10,r22 + and r11,r22 + lsl r10 + rol r11 + or r10,r20 + or r11,r21 + movw r20,r12 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r12,r22 + and r13,r22 + lsl r12 + rol r13 + lsl r12 + rol r13 + lsl r12 + rol r13 + or r12,r20 + or r13,r21 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r3,Y+2 + ldd r4,Y+3 + ldd r5,Y+4 + ldd r6,Y+5 + ldd r7,Y+6 + ldd r8,Y+7 + ldd r9,Y+8 + ldd r10,Y+9 + ldd r11,Y+10 + ldd r12,Y+11 + ldd r13,Y+12 + ldd r14,Y+13 + ldd r15,Y+14 + ldd r24,Y+15 + ldd r25,Y+16 + mov r0,r10 + and r0,r14 + eor r6,r0 + mov r0,r11 + and r0,r15 + eor r7,r0 + mov r0,r12 + and r0,r24 + eor r8,r0 + mov r0,r13 + and r0,r25 + eor r9,r0 + mov r0,r6 + or r0,r2 + eor r10,r0 + mov r0,r7 + or r0,r3 + eor r11,r0 + mov r0,r8 + or r0,r4 + eor r12,r0 + mov r0,r9 + or r0,r5 + eor r13,r0 + mov r0,r10 + or r0,r14 + eor r2,r0 + mov r0,r11 + or r0,r15 + eor r3,r0 + mov r0,r12 + or r0,r24 + eor r4,r0 + mov r0,r13 + or r0,r25 + eor r5,r0 + mov r0,r10 + and r0,r2 + eor r14,r0 + mov r0,r11 + and r0,r3 + eor r15,r0 + mov r0,r12 + and r0,r4 + eor r24,r0 + mov r0,r13 + and r0,r5 + eor r25,r0 + mov r0,r6 + or r0,r14 + eor r10,r0 + mov r0,r7 + or r0,r15 + eor r11,r0 + mov r0,r8 + or r0,r24 + eor r12,r0 + mov r0,r9 + or r0,r25 + eor r13,r0 + mov r0,r10 + or r0,r2 + eor r6,r0 + mov r0,r11 + or r0,r3 + eor r7,r0 + mov r0,r12 + or r0,r4 + eor r8,r0 + mov r0,r13 + or r0,r5 + eor r9,r0 + movw r20,r8 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r8,r22 + and r9,r22 + lsl r8 + rol r9 + lsl r8 + rol r9 + or r8,r20 + or r9,r21 + movw r20,r12 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r12,r22 + and r13,r22 + lsl r12 + rol r13 + lsl r12 + rol r13 + or r12,r20 + or r13,r21 + movw r20,r24 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r24,r22 + and r25,r22 + lsl r24 + rol r25 + lsl r24 + rol r25 + or r24,r20 + or r25,r21 + movw r20,r4 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r4,r22 + and r5,r22 + lsl r4 + rol r5 + lsl r4 + rol r5 + or r4,r20 + or r5,r21 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Y+17 + ldd r3,Y+18 + ldd r4,Y+19 + ldd r5,Y+20 + ldd r6,Y+21 + ldd r7,Y+22 + ldd r8,Y+23 + ldd r9,Y+24 + ldd r10,Y+25 + ldd r11,Y+26 + ldd r12,Y+27 + ldd r13,Y+28 + ldd r14,Y+29 + ldd r15,Y+30 + ldd r24,Y+31 + ldd r25,Y+32 + ldd r0,Y+9 + eor r0,r10 + std Y+9,r0 + ldd r0,Y+10 + eor r0,r11 + std Y+10,r0 + ldd r0,Y+11 + eor r0,r12 + std Y+11,r0 + ldd r0,Y+12 + eor r0,r13 + std Y+12,r0 + ldd r0,Y+13 + eor r0,r6 + std Y+13,r0 + ldd r0,Y+14 + eor r0,r7 + std Y+14,r0 + ldd r0,Y+15 + eor r0,r8 + std Y+15,r0 + ldd r0,Y+16 + eor r0,r9 + std Y+16,r0 + ldd r0,Y+1 + eor r0,r14 + std Y+1,r0 + ldd r0,Y+2 + eor r0,r15 + std Y+2,r0 + ldd r0,Y+3 + eor r0,r24 + std Y+3,r0 + ldd r0,Y+4 + eor r0,r25 + std Y+4,r0 + ldd r0,Y+5 + eor r0,r2 + std Y+5,r0 + ldd r0,Y+6 + eor r0,r3 + std Y+6,r0 + ldd r0,Y+7 + eor r0,r4 + std Y+7,r0 + ldd r0,Y+8 + eor r0,r5 + std Y+8,r0 + movw r20,r10 + movw r22,r12 + movw r10,r6 + movw r12,r8 + movw r6,r14 + movw r8,r24 + movw r14,r2 + movw r24,r4 + movw r2,r20 + movw r4,r22 + eor r2,r10 + eor r3,r11 + eor r4,r12 + eor r5,r13 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+13 + ldd r23,Y+14 + eor r20,r22 + eor r21,r23 + std Y+9,r22 + std Y+10,r23 + ldd r22,Y+1 + ldd r23,Y+2 + std Y+13,r22 + std Y+14,r23 + ldd r22,Y+5 + ldd r23,Y+6 + std Y+1,r22 + std Y+2,r23 + std Y+5,r20 + std Y+6,r21 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r22,Y+15 + ldd r23,Y+16 + eor r20,r22 + eor r21,r23 + std Y+11,r22 + std Y+12,r23 + ldd r22,Y+3 + ldd r23,Y+4 + std Y+15,r22 + std Y+16,r23 + ldd r22,Y+7 + ldd r23,Y+8 + std Y+3,r22 + std Y+4,r23 + std Y+7,r20 + std Y+8,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+13 + ldd r23,Y+14 + eor r20,r22 + eor r21,r23 + eor r22,r10 + eor r23,r11 + std Y+9,r22 + std Y+10,r23 + ldd r22,Y+1 + ldd r23,Y+2 + eor r22,r6 + eor r23,r7 + std Y+13,r22 + std Y+14,r23 + ldd r22,Y+5 + ldd r23,Y+6 + eor r22,r14 + eor r23,r15 + std Y+1,r22 + std Y+2,r23 + eor r20,r2 + eor r21,r3 + std Y+5,r20 + std Y+6,r21 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r22,Y+15 + ldd r23,Y+16 + eor r20,r22 + eor r21,r23 + eor r22,r12 + eor r23,r13 + std Y+11,r22 + std Y+12,r23 + ldd r22,Y+3 + ldd r23,Y+4 + eor r22,r8 + eor r23,r9 + std Y+15,r22 + std Y+16,r23 + ldd r22,Y+7 + ldd r23,Y+8 + eor r22,r24 + eor r23,r25 + std Y+3,r22 + std Y+4,r23 + eor r20,r4 + eor r21,r5 + std Y+7,r20 + std Y+8,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + movw r20,r12 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r12,r22 + and r13,r22 + lsl r12 + rol r13 + or r12,r20 + or r13,r21 + movw r20,r10 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r10,r22 + and r11,r22 + lsl r10 + rol r11 + lsl r10 + rol r11 + lsl r10 + rol r11 + or r10,r20 + or r11,r21 + movw r20,r8 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r8,r22 + and r9,r22 + lsl r8 + rol r9 + or r8,r20 + or r9,r21 + movw r20,r6 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r6,r22 + and r7,r22 + lsl r6 + rol r7 + lsl r6 + rol r7 + lsl r6 + rol r7 + or r6,r20 + or r7,r21 + movw r20,r24 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r24,r22 + and r25,r22 + lsl r24 + rol r25 + or r24,r20 + or r25,r21 + movw r20,r14 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r14,r22 + and r15,r22 + lsl r14 + rol r15 + lsl r14 + rol r15 + lsl r14 + rol r15 + or r14,r20 + or r15,r21 + movw r20,r4 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r4,r22 + and r5,r22 + lsl r4 + rol r5 + or r4,r20 + or r5,r21 + movw r20,r2 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r2,r22 + and r3,r22 + lsl r2 + rol r3 + lsl r2 + rol r3 + lsl r2 + rol r3 + or r2,r20 + or r3,r21 + ldd r0,Z+48 + eor r10,r0 + ldd r0,Z+49 + eor r11,r0 + ldd r0,Z+50 + eor r12,r0 + ldd r0,Z+51 + eor r13,r0 + ldd r0,Z+52 + eor r6,r0 + ldd r0,Z+53 + eor r7,r0 + ldd r0,Z+54 + eor r8,r0 + ldd r0,Z+55 + eor r9,r0 + ldd r0,Z+56 + eor r14,r0 + ldd r0,Z+57 + eor r15,r0 + ldd r0,Z+58 + eor r24,r0 + ldd r0,Z+59 + eor r25,r0 + ldd r0,Z+60 + eor r2,r0 + ldd r0,Z+61 + eor r3,r0 + ldd r0,Z+62 + eor r4,r0 + ldd r0,Z+63 + eor r5,r0 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r3,Y+2 + ldd r4,Y+3 + ldd r5,Y+4 + ldd r6,Y+5 + ldd r7,Y+6 + ldd r8,Y+7 + ldd r9,Y+8 + ldd r10,Y+9 + ldd r11,Y+10 + ldd r12,Y+11 + ldd r13,Y+12 + ldd r14,Y+13 + ldd r15,Y+14 + ldd r24,Y+15 + ldd r25,Y+16 + movw r20,r12 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r12,r22 + and r13,r22 + lsl r12 + rol r13 + lsl r12 + rol r13 + or r12,r20 + or r13,r21 + movw r20,r24 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r24,r22 + and r25,r22 + lsl r24 + rol r25 + lsl r24 + rol r25 + or r24,r20 + or r25,r21 + movw r20,r4 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r4,r22 + and r5,r22 + lsl r4 + rol r5 + lsl r4 + rol r5 + or r4,r20 + or r5,r21 + movw r20,r8 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r8,r22 + and r9,r22 + lsl r8 + rol r9 + lsl r8 + rol r9 + or r8,r20 + or r9,r21 + ldd r0,Z+32 + eor r10,r0 + ldd r0,Z+33 + eor r11,r0 + ldd r0,Z+34 + eor r12,r0 + ldd r0,Z+35 + eor r13,r0 + ldd r0,Z+36 + eor r14,r0 + ldd r0,Z+37 + eor r15,r0 + ldd r0,Z+38 + eor r24,r0 + ldd r0,Z+39 + eor r25,r0 + ldd r0,Z+40 + eor r2,r0 + ldd r0,Z+41 + eor r3,r0 + ldd r0,Z+42 + eor r4,r0 + ldd r0,Z+43 + eor r5,r0 + ldd r0,Z+44 + eor r6,r0 + ldd r0,Z+45 + eor r7,r0 + ldd r0,Z+46 + eor r8,r0 + ldd r0,Z+47 + eor r9,r0 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r17,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r17 +#endif + add r31,r19 + mov r30,r18 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + inc r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + inc r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r22,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r22,Z +#elif defined(__AVR_TINY__) + ld r22,Z +#else + lpm + mov r22,r0 +#endif + inc r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r23,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r23,Z +#elif defined(__AVR_TINY__) + ld r23,Z +#else + lpm + mov r23,r0 +#endif + inc r18 + eor r10,r20 + eor r11,r21 + eor r12,r22 + eor r13,r23 +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + ret +1447: + rcall 126b + rcall 1453f + dec r16 + brne 1447b + rjmp 2622f +1453: + mov r0,r14 + and r0,r2 + eor r10,r0 + mov r0,r15 + and r0,r3 + eor r11,r0 + mov r0,r24 + and r0,r4 + eor r12,r0 + mov r0,r25 + and r0,r5 + eor r13,r0 + mov r0,r10 + or r0,r6 + eor r14,r0 + mov r0,r11 + or r0,r7 + eor r15,r0 + mov r0,r12 + or r0,r8 + eor r24,r0 + mov r0,r13 + or r0,r9 + eor r25,r0 + mov r0,r14 + or r0,r2 + eor r6,r0 + mov r0,r15 + or r0,r3 + eor r7,r0 + mov r0,r24 + or r0,r4 + eor r8,r0 + mov r0,r25 + or r0,r5 + eor r9,r0 + mov r0,r14 + and r0,r6 + eor r2,r0 + mov r0,r15 + and r0,r7 + eor r3,r0 + mov r0,r24 + and r0,r8 + eor r4,r0 + mov r0,r25 + and r0,r9 + eor r5,r0 + mov r0,r10 + or r0,r2 + eor r14,r0 + mov r0,r11 + or r0,r3 + eor r15,r0 + mov r0,r12 + or r0,r4 + eor r24,r0 + mov r0,r13 + or r0,r5 + eor r25,r0 + mov r0,r14 + or r0,r6 + eor r10,r0 + mov r0,r15 + or r0,r7 + eor r11,r0 + mov r0,r24 + or r0,r8 + eor r12,r0 + mov r0,r25 + or r0,r9 + eor r13,r0 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Y+17 + ldd r3,Y+18 + ldd r4,Y+19 + ldd r5,Y+20 + ldd r6,Y+21 + ldd r7,Y+22 + ldd r8,Y+23 + ldd r9,Y+24 + ldd r10,Y+25 + ldd r11,Y+26 + ldd r12,Y+27 + ldd r13,Y+28 + ldd r14,Y+29 + ldd r15,Y+30 + ldd r24,Y+31 + ldd r25,Y+32 + mov r0,r6 + and r0,r14 + eor r10,r0 + mov r0,r7 + and r0,r15 + eor r11,r0 + mov r0,r8 + and r0,r24 + eor r12,r0 + mov r0,r9 + and r0,r25 + eor r13,r0 + mov r0,r10 + or r0,r2 + eor r6,r0 + mov r0,r11 + or r0,r3 + eor r7,r0 + mov r0,r12 + or r0,r4 + eor r8,r0 + mov r0,r13 + or r0,r5 + eor r9,r0 + mov r0,r6 + or r0,r14 + eor r2,r0 + mov r0,r7 + or r0,r15 + eor r3,r0 + mov r0,r8 + or r0,r24 + eor r4,r0 + mov r0,r9 + or r0,r25 + eor r5,r0 + mov r0,r6 + and r0,r2 + eor r14,r0 + mov r0,r7 + and r0,r3 + eor r15,r0 + mov r0,r8 + and r0,r4 + eor r24,r0 + mov r0,r9 + and r0,r5 + eor r25,r0 + mov r0,r10 + or r0,r14 + eor r6,r0 + mov r0,r11 + or r0,r15 + eor r7,r0 + mov r0,r12 + or r0,r24 + eor r8,r0 + mov r0,r13 + or r0,r25 + eor r9,r0 + mov r0,r6 + or r0,r2 + eor r10,r0 + mov r0,r7 + or r0,r3 + eor r11,r0 + mov r0,r8 + or r0,r4 + eor r12,r0 + mov r0,r9 + or r0,r5 + eor r13,r0 + ldd r0,Y+13 + eor r0,r2 + std Y+13,r0 + ldd r0,Y+14 + eor r0,r3 + std Y+14,r0 + ldd r0,Y+15 + eor r0,r4 + std Y+15,r0 + ldd r0,Y+16 + eor r0,r5 + std Y+16,r0 + ldd r0,Y+1 + eor r0,r6 + std Y+1,r0 + ldd r0,Y+2 + eor r0,r7 + std Y+2,r0 + ldd r0,Y+3 + eor r0,r8 + std Y+3,r0 + ldd r0,Y+4 + eor r0,r9 + std Y+4,r0 + ldd r0,Y+5 + eor r0,r10 + std Y+5,r0 + ldd r0,Y+6 + eor r0,r11 + std Y+6,r0 + ldd r0,Y+7 + eor r0,r12 + std Y+7,r0 + ldd r0,Y+8 + eor r0,r13 + std Y+8,r0 + ldd r0,Y+9 + eor r0,r14 + std Y+9,r0 + ldd r0,Y+10 + eor r0,r15 + std Y+10,r0 + ldd r0,Y+11 + eor r0,r24 + std Y+11,r0 + ldd r0,Y+12 + eor r0,r25 + std Y+12,r0 + movw r20,r2 + movw r22,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r20 + movw r24,r22 + eor r14,r2 + eor r15,r3 + eor r24,r4 + eor r25,r5 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+1 + ldd r23,Y+2 + eor r20,r22 + eor r21,r23 + std Y+13,r22 + std Y+14,r23 + ldd r22,Y+5 + ldd r23,Y+6 + std Y+1,r22 + std Y+2,r23 + ldd r22,Y+9 + ldd r23,Y+10 + std Y+5,r22 + std Y+6,r23 + std Y+9,r20 + std Y+10,r21 + ldd r20,Y+15 + ldd r21,Y+16 + ldd r22,Y+3 + ldd r23,Y+4 + eor r20,r22 + eor r21,r23 + std Y+15,r22 + std Y+16,r23 + ldd r22,Y+7 + ldd r23,Y+8 + std Y+3,r22 + std Y+4,r23 + ldd r22,Y+11 + ldd r23,Y+12 + std Y+7,r22 + std Y+8,r23 + std Y+11,r20 + std Y+12,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+1 + ldd r23,Y+2 + eor r20,r22 + eor r21,r23 + eor r22,r2 + eor r23,r3 + std Y+13,r22 + std Y+14,r23 + ldd r22,Y+5 + ldd r23,Y+6 + eor r22,r6 + eor r23,r7 + std Y+1,r22 + std Y+2,r23 + ldd r22,Y+9 + ldd r23,Y+10 + eor r22,r10 + eor r23,r11 + std Y+5,r22 + std Y+6,r23 + eor r20,r14 + eor r21,r15 + std Y+9,r20 + std Y+10,r21 + ldd r20,Y+15 + ldd r21,Y+16 + ldd r22,Y+3 + ldd r23,Y+4 + eor r20,r22 + eor r21,r23 + eor r22,r4 + eor r23,r5 + std Y+15,r22 + std Y+16,r23 + ldd r22,Y+7 + ldd r23,Y+8 + eor r22,r8 + eor r23,r9 + std Y+3,r22 + std Y+4,r23 + ldd r22,Y+11 + ldd r23,Y+12 + eor r22,r12 + eor r23,r13 + std Y+7,r22 + std Y+8,r23 + eor r20,r24 + eor r21,r25 + std Y+11,r20 + std Y+12,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + mov r0,r6 + and r0,r10 + eor r2,r0 + mov r0,r7 + and r0,r11 + eor r3,r0 + mov r0,r8 + and r0,r12 + eor r4,r0 + mov r0,r9 + and r0,r13 + eor r5,r0 + mov r0,r2 + or r0,r14 + eor r6,r0 + mov r0,r3 + or r0,r15 + eor r7,r0 + mov r0,r4 + or r0,r24 + eor r8,r0 + mov r0,r5 + or r0,r25 + eor r9,r0 + mov r0,r6 + or r0,r10 + eor r14,r0 + mov r0,r7 + or r0,r11 + eor r15,r0 + mov r0,r8 + or r0,r12 + eor r24,r0 + mov r0,r9 + or r0,r13 + eor r25,r0 + mov r0,r6 + and r0,r14 + eor r10,r0 + mov r0,r7 + and r0,r15 + eor r11,r0 + mov r0,r8 + and r0,r24 + eor r12,r0 + mov r0,r9 + and r0,r25 + eor r13,r0 + mov r0,r2 + or r0,r10 + eor r6,r0 + mov r0,r3 + or r0,r11 + eor r7,r0 + mov r0,r4 + or r0,r12 + eor r8,r0 + mov r0,r5 + or r0,r13 + eor r9,r0 + mov r0,r6 + or r0,r14 + eor r2,r0 + mov r0,r7 + or r0,r15 + eor r3,r0 + mov r0,r8 + or r0,r24 + eor r4,r0 + mov r0,r9 + or r0,r25 + eor r5,r0 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + mov r0,r25 + mov r25,r24 + mov r24,r0 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + mov r0,r9 + mov r9,r8 + mov r8,r0 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + mov r0,r5 + mov r5,r4 + mov r4,r0 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + mov r0,r13 + mov r13,r12 + mov r12,r0 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r3,Y+2 + ldd r4,Y+3 + ldd r5,Y+4 + ldd r6,Y+5 + ldd r7,Y+6 + ldd r8,Y+7 + ldd r9,Y+8 + ldd r10,Y+9 + ldd r11,Y+10 + ldd r12,Y+11 + ldd r13,Y+12 + ldd r14,Y+13 + ldd r15,Y+14 + ldd r24,Y+15 + ldd r25,Y+16 + mov r0,r2 + and r0,r6 + eor r14,r0 + mov r0,r3 + and r0,r7 + eor r15,r0 + mov r0,r4 + and r0,r8 + eor r24,r0 + mov r0,r5 + and r0,r9 + eor r25,r0 + mov r0,r14 + or r0,r10 + eor r2,r0 + mov r0,r15 + or r0,r11 + eor r3,r0 + mov r0,r24 + or r0,r12 + eor r4,r0 + mov r0,r25 + or r0,r13 + eor r5,r0 + mov r0,r2 + or r0,r6 + eor r10,r0 + mov r0,r3 + or r0,r7 + eor r11,r0 + mov r0,r4 + or r0,r8 + eor r12,r0 + mov r0,r5 + or r0,r9 + eor r13,r0 + mov r0,r2 + and r0,r10 + eor r6,r0 + mov r0,r3 + and r0,r11 + eor r7,r0 + mov r0,r4 + and r0,r12 + eor r8,r0 + mov r0,r5 + and r0,r13 + eor r9,r0 + mov r0,r14 + or r0,r6 + eor r2,r0 + mov r0,r15 + or r0,r7 + eor r3,r0 + mov r0,r24 + or r0,r8 + eor r4,r0 + mov r0,r25 + or r0,r9 + eor r5,r0 + mov r0,r2 + or r0,r10 + eor r14,r0 + mov r0,r3 + or r0,r11 + eor r15,r0 + mov r0,r4 + or r0,r12 + eor r24,r0 + mov r0,r5 + or r0,r13 + eor r25,r0 + mov r0,r5 + mov r5,r4 + mov r4,r0 + mov r0,r9 + mov r9,r8 + mov r8,r0 + mov r0,r13 + mov r13,r12 + mov r12,r0 + mov r0,r25 + mov r25,r24 + mov r24,r0 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Y+17 + ldd r3,Y+18 + ldd r4,Y+19 + ldd r5,Y+20 + ldd r6,Y+21 + ldd r7,Y+22 + ldd r8,Y+23 + ldd r9,Y+24 + ldd r10,Y+25 + ldd r11,Y+26 + ldd r12,Y+27 + ldd r13,Y+28 + ldd r14,Y+29 + ldd r15,Y+30 + ldd r24,Y+31 + ldd r25,Y+32 + ldd r0,Y+1 + eor r0,r14 + std Y+1,r0 + ldd r0,Y+2 + eor r0,r15 + std Y+2,r0 + ldd r0,Y+3 + eor r0,r24 + std Y+3,r0 + ldd r0,Y+4 + eor r0,r25 + std Y+4,r0 + ldd r0,Y+5 + eor r0,r6 + std Y+5,r0 + ldd r0,Y+6 + eor r0,r7 + std Y+6,r0 + ldd r0,Y+7 + eor r0,r8 + std Y+7,r0 + ldd r0,Y+8 + eor r0,r9 + std Y+8,r0 + ldd r0,Y+9 + eor r0,r2 + std Y+9,r0 + ldd r0,Y+10 + eor r0,r3 + std Y+10,r0 + ldd r0,Y+11 + eor r0,r4 + std Y+11,r0 + ldd r0,Y+12 + eor r0,r5 + std Y+12,r0 + ldd r0,Y+13 + eor r0,r10 + std Y+13,r0 + ldd r0,Y+14 + eor r0,r11 + std Y+14,r0 + ldd r0,Y+15 + eor r0,r12 + std Y+15,r0 + ldd r0,Y+16 + eor r0,r13 + std Y+16,r0 + movw r20,r14 + movw r22,r24 + movw r14,r6 + movw r24,r8 + movw r6,r2 + movw r8,r4 + movw r2,r10 + movw r4,r12 + movw r10,r20 + movw r12,r22 + eor r10,r14 + eor r11,r15 + eor r12,r24 + eor r13,r25 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+5 + ldd r23,Y+6 + eor r20,r22 + eor r21,r23 + std Y+1,r22 + std Y+2,r23 + ldd r22,Y+9 + ldd r23,Y+10 + std Y+5,r22 + std Y+6,r23 + ldd r22,Y+13 + ldd r23,Y+14 + std Y+9,r22 + std Y+10,r23 + std Y+13,r20 + std Y+14,r21 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r22,Y+7 + ldd r23,Y+8 + eor r20,r22 + eor r21,r23 + std Y+3,r22 + std Y+4,r23 + ldd r22,Y+11 + ldd r23,Y+12 + std Y+7,r22 + std Y+8,r23 + ldd r22,Y+15 + ldd r23,Y+16 + std Y+11,r22 + std Y+12,r23 + std Y+15,r20 + std Y+16,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+5 + ldd r23,Y+6 + eor r20,r22 + eor r21,r23 + eor r22,r14 + eor r23,r15 + std Y+1,r22 + std Y+2,r23 + ldd r22,Y+9 + ldd r23,Y+10 + eor r22,r6 + eor r23,r7 + std Y+5,r22 + std Y+6,r23 + ldd r22,Y+13 + ldd r23,Y+14 + eor r22,r2 + eor r23,r3 + std Y+9,r22 + std Y+10,r23 + eor r20,r10 + eor r21,r11 + std Y+13,r20 + std Y+14,r21 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r22,Y+7 + ldd r23,Y+8 + eor r20,r22 + eor r21,r23 + eor r22,r24 + eor r23,r25 + std Y+3,r22 + std Y+4,r23 + ldd r22,Y+11 + ldd r23,Y+12 + eor r22,r8 + eor r23,r9 + std Y+7,r22 + std Y+8,r23 + ldd r22,Y+15 + ldd r23,Y+16 + eor r22,r4 + eor r23,r5 + std Y+11,r22 + std Y+12,r23 + eor r20,r12 + eor r21,r13 + std Y+15,r20 + std Y+16,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + mov r0,r15 + mov r15,r14 + mov r14,r0 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + mov r0,r7 + mov r7,r6 + mov r6,r0 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + mov r0,r3 + mov r3,r2 + mov r2,r0 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + mov r0,r11 + mov r11,r10 + mov r10,r0 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r17,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r17 +#endif + add r31,r19 + mov r30,r18 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + inc r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + inc r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r22,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r22,Z +#elif defined(__AVR_TINY__) + ld r22,Z +#else + lpm + mov r22,r0 +#endif + inc r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r23,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r23,Z +#elif defined(__AVR_TINY__) + ld r23,Z +#else + lpm + mov r23,r0 +#endif + inc r18 +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + ldd r0,Z+16 + eor r14,r0 + ldd r0,Z+17 + eor r15,r0 + ldd r0,Z+18 + eor r24,r0 + ldd r0,Z+19 + eor r25,r0 + ldd r0,Z+20 + eor r6,r0 + ldd r0,Z+21 + eor r7,r0 + ldd r0,Z+22 + eor r8,r0 + ldd r0,Z+23 + eor r9,r0 + ldd r0,Z+24 + eor r2,r0 + ldd r0,Z+25 + eor r3,r0 + ldd r0,Z+26 + eor r4,r0 + ldd r0,Z+27 + eor r5,r0 + ldd r0,Z+28 + eor r10,r0 + ldd r0,Z+29 + eor r11,r0 + ldd r0,Z+30 + eor r12,r0 + ldd r0,Z+31 + eor r13,r0 + std Y+17,r14 + std Y+18,r15 + std Y+19,r24 + std Y+20,r25 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r2 + std Y+26,r3 + std Y+27,r4 + std Y+28,r5 + std Y+29,r10 + std Y+30,r11 + std Y+31,r12 + std Y+32,r13 + ldd r2,Y+1 + ldd r3,Y+2 + ldd r4,Y+3 + ldd r5,Y+4 + ldd r6,Y+5 + ldd r7,Y+6 + ldd r8,Y+7 + ldd r9,Y+8 + ldd r10,Y+9 + ldd r11,Y+10 + ldd r12,Y+11 + ldd r13,Y+12 + ldd r14,Y+13 + ldd r15,Y+14 + ldd r24,Y+15 + ldd r25,Y+16 + mov r0,r5 + mov r5,r4 + mov r4,r0 + mov r0,r9 + mov r9,r8 + mov r8,r0 + mov r0,r13 + mov r13,r12 + mov r12,r0 + mov r0,r25 + mov r25,r24 + mov r24,r0 + ld r0,Z + eor r2,r0 + ldd r0,Z+1 + eor r3,r0 + ldd r0,Z+2 + eor r4,r0 + ldd r0,Z+3 + eor r5,r0 + ldd r0,Z+4 + eor r6,r0 + ldd r0,Z+5 + eor r7,r0 + ldd r0,Z+6 + eor r8,r0 + ldd r0,Z+7 + eor r9,r0 + ldd r0,Z+8 + eor r10,r0 + ldd r0,Z+9 + eor r11,r0 + ldd r0,Z+10 + eor r12,r0 + ldd r0,Z+11 + eor r13,r0 + ldd r0,Z+12 + eor r14,r0 + ldd r0,Z+13 + eor r15,r0 + ldd r0,Z+14 + eor r24,r0 + ldd r0,Z+15 + eor r25,r0 + eor r2,r20 + eor r3,r21 + eor r4,r22 + eor r5,r23 + ret +2622: + ldd r26,Y+33 + ldd r27,Y+34 + st X+,r2 + st X+,r3 + st X+,r6 + st X+,r7 + st X+,r10 + st X+,r11 + st X+,r14 + st X+,r15 + ldd r20,Y+17 + ldd r21,Y+18 + st X+,r20 + st X+,r21 + ldd r20,Y+21 + ldd r21,Y+22 + st X+,r20 + st X+,r21 + ldd r20,Y+25 + ldd r21,Y+26 + st X+,r20 + st X+,r21 + ldd r20,Y+29 + ldd r21,Y+30 + st X+,r20 + st X+,r21 + st X+,r4 + st X+,r5 + st X+,r8 + st X+,r9 + st X+,r12 + st X+,r13 + st X+,r24 + st X+,r25 + ldd r20,Y+19 + ldd r21,Y+20 + st X+,r20 + st X+,r21 + ldd r20,Y+23 + ldd r21,Y+24 + st X+,r20 + st X+,r21 + ldd r20,Y+27 + ldd r21,Y+28 + st X+,r20 + st X+,r21 + ldd r20,Y+31 + ldd r21,Y+32 + st X+,r20 + st X+,r21 + adiw r28,34 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size saturnin_encrypt_block, .-saturnin_encrypt_block + + .text +.global saturnin_decrypt_block + .type saturnin_decrypt_block, @function +saturnin_decrypt_block: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r23 + push r22 + movw r30,r24 + movw r26,r20 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 52 + ld r2,X+ + ld r3,X+ + ld r6,X+ + ld r7,X+ + ld r10,X+ + ld r11,X+ + ld r14,X+ + ld r15,X+ + ld r20,X+ + ld r21,X+ + std Y+17,r20 + std Y+18,r21 + ld r20,X+ + ld r21,X+ + std Y+21,r20 + std Y+22,r21 + ld r20,X+ + ld r21,X+ + std Y+25,r20 + std Y+26,r21 + ld r20,X+ + ld r21,X+ + std Y+29,r20 + std Y+30,r21 + ld r4,X+ + ld r5,X+ + ld r8,X+ + ld r9,X+ + ld r12,X+ + ld r13,X+ + ld r24,X+ + ld r25,X+ + ld r20,X+ + ld r21,X+ + std Y+19,r20 + std Y+20,r21 + ld r20,X+ + ld r21,X+ + std Y+23,r20 + std Y+24,r21 + ld r20,X+ + ld r21,X+ + std Y+27,r20 + std Y+28,r21 + ld r20,X+ + ld r21,X+ + std Y+31,r20 + std Y+32,r21 + ldi r16,10 + cpi r18,60 + cpc r19,r1 + brcs 56f + ldi r16,16 + ldi r17,4 + add r18,r17 + adc r19,r1 +56: + add r18,r16 + adc r19,r1 + lsl r18 + rol r19 + lsl r18 + rol r19 + rjmp 1233f +64: + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r17,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r17 +#endif + add r31,r19 + dec r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r23,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r23,Z +#elif defined(__AVR_TINY__) + ld r23,Z +#else + lpm + mov r23,r0 +#endif + dec r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r22,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r22,Z +#elif defined(__AVR_TINY__) + ld r22,Z +#else + lpm + mov r22,r0 +#endif + dec r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + dec r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + eor r2,r20 + eor r3,r21 + eor r4,r22 + eor r5,r23 +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + ld r0,Z + eor r2,r0 + ldd r0,Z+1 + eor r3,r0 + ldd r0,Z+2 + eor r4,r0 + ldd r0,Z+3 + eor r5,r0 + ldd r0,Z+4 + eor r6,r0 + ldd r0,Z+5 + eor r7,r0 + ldd r0,Z+6 + eor r8,r0 + ldd r0,Z+7 + eor r9,r0 + ldd r0,Z+8 + eor r10,r0 + ldd r0,Z+9 + eor r11,r0 + ldd r0,Z+10 + eor r12,r0 + ldd r0,Z+11 + eor r13,r0 + ldd r0,Z+12 + eor r14,r0 + ldd r0,Z+13 + eor r15,r0 + ldd r0,Z+14 + eor r24,r0 + ldd r0,Z+15 + eor r25,r0 + mov r0,r5 + mov r5,r4 + mov r4,r0 + mov r0,r9 + mov r9,r8 + mov r8,r0 + mov r0,r13 + mov r13,r12 + mov r12,r0 + mov r0,r25 + mov r25,r24 + mov r24,r0 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r14,Y+17 + ldd r15,Y+18 + ldd r24,Y+19 + ldd r25,Y+20 + ldd r6,Y+21 + ldd r7,Y+22 + ldd r8,Y+23 + ldd r9,Y+24 + ldd r2,Y+25 + ldd r3,Y+26 + ldd r4,Y+27 + ldd r5,Y+28 + ldd r10,Y+29 + ldd r11,Y+30 + ldd r12,Y+31 + ldd r13,Y+32 + ldd r0,Z+16 + eor r14,r0 + ldd r0,Z+17 + eor r15,r0 + ldd r0,Z+18 + eor r24,r0 + ldd r0,Z+19 + eor r25,r0 + ldd r0,Z+20 + eor r6,r0 + ldd r0,Z+21 + eor r7,r0 + ldd r0,Z+22 + eor r8,r0 + ldd r0,Z+23 + eor r9,r0 + ldd r0,Z+24 + eor r2,r0 + ldd r0,Z+25 + eor r3,r0 + ldd r0,Z+26 + eor r4,r0 + ldd r0,Z+27 + eor r5,r0 + ldd r0,Z+28 + eor r10,r0 + ldd r0,Z+29 + eor r11,r0 + ldd r0,Z+30 + eor r12,r0 + ldd r0,Z+31 + eor r13,r0 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + mov r0,r25 + mov r25,r24 + mov r24,r0 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + mov r0,r9 + mov r9,r8 + mov r8,r0 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + mov r0,r5 + mov r5,r4 + mov r4,r0 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + mov r0,r13 + mov r13,r12 + mov r12,r0 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+13 + ldd r21,Y+14 + eor r20,r10 + eor r21,r11 + ldd r22,Y+9 + ldd r23,Y+10 + eor r22,r2 + eor r23,r3 + std Y+13,r22 + std Y+14,r23 + ldd r22,Y+5 + ldd r23,Y+6 + eor r22,r6 + eor r23,r7 + std Y+9,r22 + std Y+10,r23 + ldd r22,Y+1 + ldd r23,Y+2 + eor r22,r14 + eor r23,r15 + eor r20,r22 + eor r21,r23 + std Y+5,r22 + std Y+6,r23 + std Y+1,r20 + std Y+2,r21 + ldd r20,Y+15 + ldd r21,Y+16 + eor r20,r12 + eor r21,r13 + ldd r22,Y+11 + ldd r23,Y+12 + eor r22,r4 + eor r23,r5 + std Y+15,r22 + std Y+16,r23 + ldd r22,Y+7 + ldd r23,Y+8 + eor r22,r8 + eor r23,r9 + std Y+11,r22 + std Y+12,r23 + ldd r22,Y+3 + ldd r23,Y+4 + eor r22,r24 + eor r23,r25 + eor r20,r22 + eor r21,r23 + std Y+7,r22 + std Y+8,r23 + std Y+3,r20 + std Y+4,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+9 + ldd r23,Y+10 + std Y+13,r22 + std Y+14,r23 + ldd r22,Y+5 + ldd r23,Y+6 + std Y+9,r22 + std Y+10,r23 + ldd r22,Y+1 + ldd r23,Y+2 + eor r20,r22 + eor r21,r23 + std Y+5,r22 + std Y+6,r23 + std Y+1,r20 + std Y+2,r21 + ldd r20,Y+15 + ldd r21,Y+16 + ldd r22,Y+11 + ldd r23,Y+12 + std Y+15,r22 + std Y+16,r23 + ldd r22,Y+7 + ldd r23,Y+8 + std Y+11,r22 + std Y+12,r23 + ldd r22,Y+3 + ldd r23,Y+4 + eor r20,r22 + eor r21,r23 + std Y+7,r22 + std Y+8,r23 + std Y+3,r20 + std Y+4,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + movw r20,r10 + movw r22,r12 + movw r10,r2 + movw r12,r4 + movw r2,r6 + movw r4,r8 + movw r6,r14 + movw r8,r24 + movw r14,r20 + movw r24,r22 + eor r14,r6 + eor r15,r7 + eor r24,r8 + eor r25,r9 + ldd r0,Y+1 + eor r0,r14 + std Y+1,r0 + ldd r0,Y+2 + eor r0,r15 + std Y+2,r0 + ldd r0,Y+3 + eor r0,r24 + std Y+3,r0 + ldd r0,Y+4 + eor r0,r25 + std Y+4,r0 + ldd r0,Y+5 + eor r0,r6 + std Y+5,r0 + ldd r0,Y+6 + eor r0,r7 + std Y+6,r0 + ldd r0,Y+7 + eor r0,r8 + std Y+7,r0 + ldd r0,Y+8 + eor r0,r9 + std Y+8,r0 + ldd r0,Y+9 + eor r0,r2 + std Y+9,r0 + ldd r0,Y+10 + eor r0,r3 + std Y+10,r0 + ldd r0,Y+11 + eor r0,r4 + std Y+11,r0 + ldd r0,Y+12 + eor r0,r5 + std Y+12,r0 + ldd r0,Y+13 + eor r0,r10 + std Y+13,r0 + ldd r0,Y+14 + eor r0,r11 + std Y+14,r0 + ldd r0,Y+15 + eor r0,r12 + std Y+15,r0 + ldd r0,Y+16 + eor r0,r13 + std Y+16,r0 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r3,Y+2 + ldd r4,Y+3 + ldd r5,Y+4 + ldd r6,Y+5 + ldd r7,Y+6 + ldd r8,Y+7 + ldd r9,Y+8 + ldd r10,Y+9 + ldd r11,Y+10 + ldd r12,Y+11 + ldd r13,Y+12 + ldd r14,Y+13 + ldd r15,Y+14 + ldd r24,Y+15 + ldd r25,Y+16 + mov r0,r5 + mov r5,r4 + mov r4,r0 + mov r0,r9 + mov r9,r8 + mov r8,r0 + mov r0,r13 + mov r13,r12 + mov r12,r0 + mov r0,r25 + mov r25,r24 + mov r24,r0 + mov r0,r2 + or r0,r10 + eor r14,r0 + mov r0,r3 + or r0,r11 + eor r15,r0 + mov r0,r4 + or r0,r12 + eor r24,r0 + mov r0,r5 + or r0,r13 + eor r25,r0 + mov r0,r14 + or r0,r6 + eor r2,r0 + mov r0,r15 + or r0,r7 + eor r3,r0 + mov r0,r24 + or r0,r8 + eor r4,r0 + mov r0,r25 + or r0,r9 + eor r5,r0 + mov r0,r2 + and r0,r10 + eor r6,r0 + mov r0,r3 + and r0,r11 + eor r7,r0 + mov r0,r4 + and r0,r12 + eor r8,r0 + mov r0,r5 + and r0,r13 + eor r9,r0 + mov r0,r2 + or r0,r6 + eor r10,r0 + mov r0,r3 + or r0,r7 + eor r11,r0 + mov r0,r4 + or r0,r8 + eor r12,r0 + mov r0,r5 + or r0,r9 + eor r13,r0 + mov r0,r14 + or r0,r10 + eor r2,r0 + mov r0,r15 + or r0,r11 + eor r3,r0 + mov r0,r24 + or r0,r12 + eor r4,r0 + mov r0,r25 + or r0,r13 + eor r5,r0 + mov r0,r2 + and r0,r6 + eor r14,r0 + mov r0,r3 + and r0,r7 + eor r15,r0 + mov r0,r4 + and r0,r8 + eor r24,r0 + mov r0,r5 + and r0,r9 + eor r25,r0 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Y+17 + ldd r3,Y+18 + ldd r4,Y+19 + ldd r5,Y+20 + ldd r6,Y+21 + ldd r7,Y+22 + ldd r8,Y+23 + ldd r9,Y+24 + ldd r10,Y+25 + ldd r11,Y+26 + ldd r12,Y+27 + ldd r13,Y+28 + ldd r14,Y+29 + ldd r15,Y+30 + ldd r24,Y+31 + ldd r25,Y+32 + mov r0,r15 + mov r15,r14 + mov r14,r0 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + mov r0,r7 + mov r7,r6 + mov r6,r0 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + mov r0,r3 + mov r3,r2 + mov r2,r0 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + mov r0,r11 + mov r11,r10 + mov r10,r0 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + mov r0,r6 + or r0,r14 + eor r2,r0 + mov r0,r7 + or r0,r15 + eor r3,r0 + mov r0,r8 + or r0,r24 + eor r4,r0 + mov r0,r9 + or r0,r25 + eor r5,r0 + mov r0,r2 + or r0,r10 + eor r6,r0 + mov r0,r3 + or r0,r11 + eor r7,r0 + mov r0,r4 + or r0,r12 + eor r8,r0 + mov r0,r5 + or r0,r13 + eor r9,r0 + mov r0,r6 + and r0,r14 + eor r10,r0 + mov r0,r7 + and r0,r15 + eor r11,r0 + mov r0,r8 + and r0,r24 + eor r12,r0 + mov r0,r9 + and r0,r25 + eor r13,r0 + mov r0,r6 + or r0,r10 + eor r14,r0 + mov r0,r7 + or r0,r11 + eor r15,r0 + mov r0,r8 + or r0,r12 + eor r24,r0 + mov r0,r9 + or r0,r13 + eor r25,r0 + mov r0,r2 + or r0,r14 + eor r6,r0 + mov r0,r3 + or r0,r15 + eor r7,r0 + mov r0,r4 + or r0,r24 + eor r8,r0 + mov r0,r5 + or r0,r25 + eor r9,r0 + mov r0,r6 + and r0,r10 + eor r2,r0 + mov r0,r7 + and r0,r11 + eor r3,r0 + mov r0,r8 + and r0,r12 + eor r4,r0 + mov r0,r9 + and r0,r13 + eor r5,r0 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+9 + ldd r21,Y+10 + eor r20,r14 + eor r21,r15 + ldd r22,Y+5 + ldd r23,Y+6 + eor r22,r10 + eor r23,r11 + std Y+9,r22 + std Y+10,r23 + ldd r22,Y+1 + ldd r23,Y+2 + eor r22,r6 + eor r23,r7 + std Y+5,r22 + std Y+6,r23 + ldd r22,Y+13 + ldd r23,Y+14 + eor r22,r2 + eor r23,r3 + eor r20,r22 + eor r21,r23 + std Y+1,r22 + std Y+2,r23 + std Y+13,r20 + std Y+14,r21 + ldd r20,Y+11 + ldd r21,Y+12 + eor r20,r24 + eor r21,r25 + ldd r22,Y+7 + ldd r23,Y+8 + eor r22,r12 + eor r23,r13 + std Y+11,r22 + std Y+12,r23 + ldd r22,Y+3 + ldd r23,Y+4 + eor r22,r8 + eor r23,r9 + std Y+7,r22 + std Y+8,r23 + ldd r22,Y+15 + ldd r23,Y+16 + eor r22,r4 + eor r23,r5 + eor r20,r22 + eor r21,r23 + std Y+3,r22 + std Y+4,r23 + std Y+15,r20 + std Y+16,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+5 + ldd r23,Y+6 + std Y+9,r22 + std Y+10,r23 + ldd r22,Y+1 + ldd r23,Y+2 + std Y+5,r22 + std Y+6,r23 + ldd r22,Y+13 + ldd r23,Y+14 + eor r20,r22 + eor r21,r23 + std Y+1,r22 + std Y+2,r23 + std Y+13,r20 + std Y+14,r21 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r22,Y+7 + ldd r23,Y+8 + std Y+11,r22 + std Y+12,r23 + ldd r22,Y+3 + ldd r23,Y+4 + std Y+7,r22 + std Y+8,r23 + ldd r22,Y+15 + ldd r23,Y+16 + eor r20,r22 + eor r21,r23 + std Y+3,r22 + std Y+4,r23 + std Y+15,r20 + std Y+16,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + movw r20,r14 + movw r22,r24 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r20 + movw r4,r22 + eor r2,r6 + eor r3,r7 + eor r4,r8 + eor r5,r9 + ldd r0,Y+13 + eor r0,r2 + std Y+13,r0 + ldd r0,Y+14 + eor r0,r3 + std Y+14,r0 + ldd r0,Y+15 + eor r0,r4 + std Y+15,r0 + ldd r0,Y+16 + eor r0,r5 + std Y+16,r0 + ldd r0,Y+1 + eor r0,r6 + std Y+1,r0 + ldd r0,Y+2 + eor r0,r7 + std Y+2,r0 + ldd r0,Y+3 + eor r0,r8 + std Y+3,r0 + ldd r0,Y+4 + eor r0,r9 + std Y+4,r0 + ldd r0,Y+5 + eor r0,r10 + std Y+5,r0 + ldd r0,Y+6 + eor r0,r11 + std Y+6,r0 + ldd r0,Y+7 + eor r0,r12 + std Y+7,r0 + ldd r0,Y+8 + eor r0,r13 + std Y+8,r0 + ldd r0,Y+9 + eor r0,r14 + std Y+9,r0 + ldd r0,Y+10 + eor r0,r15 + std Y+10,r0 + ldd r0,Y+11 + eor r0,r24 + std Y+11,r0 + ldd r0,Y+12 + eor r0,r25 + std Y+12,r0 + mov r0,r6 + or r0,r2 + eor r10,r0 + mov r0,r7 + or r0,r3 + eor r11,r0 + mov r0,r8 + or r0,r4 + eor r12,r0 + mov r0,r9 + or r0,r5 + eor r13,r0 + mov r0,r10 + or r0,r14 + eor r6,r0 + mov r0,r11 + or r0,r15 + eor r7,r0 + mov r0,r12 + or r0,r24 + eor r8,r0 + mov r0,r13 + or r0,r25 + eor r9,r0 + mov r0,r6 + and r0,r2 + eor r14,r0 + mov r0,r7 + and r0,r3 + eor r15,r0 + mov r0,r8 + and r0,r4 + eor r24,r0 + mov r0,r9 + and r0,r5 + eor r25,r0 + mov r0,r6 + or r0,r14 + eor r2,r0 + mov r0,r7 + or r0,r15 + eor r3,r0 + mov r0,r8 + or r0,r24 + eor r4,r0 + mov r0,r9 + or r0,r25 + eor r5,r0 + mov r0,r10 + or r0,r2 + eor r6,r0 + mov r0,r11 + or r0,r3 + eor r7,r0 + mov r0,r12 + or r0,r4 + eor r8,r0 + mov r0,r13 + or r0,r5 + eor r9,r0 + mov r0,r6 + and r0,r14 + eor r10,r0 + mov r0,r7 + and r0,r15 + eor r11,r0 + mov r0,r8 + and r0,r24 + eor r12,r0 + mov r0,r9 + and r0,r25 + eor r13,r0 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r3,Y+2 + ldd r4,Y+3 + ldd r5,Y+4 + ldd r6,Y+5 + ldd r7,Y+6 + ldd r8,Y+7 + ldd r9,Y+8 + ldd r10,Y+9 + ldd r11,Y+10 + ldd r12,Y+11 + ldd r13,Y+12 + ldd r14,Y+13 + ldd r15,Y+14 + ldd r24,Y+15 + ldd r25,Y+16 + mov r0,r14 + or r0,r6 + eor r10,r0 + mov r0,r15 + or r0,r7 + eor r11,r0 + mov r0,r24 + or r0,r8 + eor r12,r0 + mov r0,r25 + or r0,r9 + eor r13,r0 + mov r0,r10 + or r0,r2 + eor r14,r0 + mov r0,r11 + or r0,r3 + eor r15,r0 + mov r0,r12 + or r0,r4 + eor r24,r0 + mov r0,r13 + or r0,r5 + eor r25,r0 + mov r0,r14 + and r0,r6 + eor r2,r0 + mov r0,r15 + and r0,r7 + eor r3,r0 + mov r0,r24 + and r0,r8 + eor r4,r0 + mov r0,r25 + and r0,r9 + eor r5,r0 + mov r0,r14 + or r0,r2 + eor r6,r0 + mov r0,r15 + or r0,r3 + eor r7,r0 + mov r0,r24 + or r0,r4 + eor r8,r0 + mov r0,r25 + or r0,r5 + eor r9,r0 + mov r0,r10 + or r0,r6 + eor r14,r0 + mov r0,r11 + or r0,r7 + eor r15,r0 + mov r0,r12 + or r0,r8 + eor r24,r0 + mov r0,r13 + or r0,r9 + eor r25,r0 + mov r0,r14 + and r0,r2 + eor r10,r0 + mov r0,r15 + and r0,r3 + eor r11,r0 + mov r0,r24 + and r0,r4 + eor r12,r0 + mov r0,r25 + and r0,r5 + eor r13,r0 + ret +1233: + rcall 64b + rcall 1239f + subi r16,2 + brne 1233b + rjmp 2560f +1239: + ldd r0,Z+32 + eor r10,r0 + ldd r0,Z+33 + eor r11,r0 + ldd r0,Z+34 + eor r12,r0 + ldd r0,Z+35 + eor r13,r0 + ldd r0,Z+36 + eor r14,r0 + ldd r0,Z+37 + eor r15,r0 + ldd r0,Z+38 + eor r24,r0 + ldd r0,Z+39 + eor r25,r0 + ldd r0,Z+40 + eor r2,r0 + ldd r0,Z+41 + eor r3,r0 + ldd r0,Z+42 + eor r4,r0 + ldd r0,Z+43 + eor r5,r0 + ldd r0,Z+44 + eor r6,r0 + ldd r0,Z+45 + eor r7,r0 + ldd r0,Z+46 + eor r8,r0 + ldd r0,Z+47 + eor r9,r0 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r17,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r17 +#endif + add r31,r19 + dec r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r23,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r23,Z +#elif defined(__AVR_TINY__) + ld r23,Z +#else + lpm + mov r23,r0 +#endif + dec r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r22,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r22,Z +#elif defined(__AVR_TINY__) + ld r22,Z +#else + lpm + mov r22,r0 +#endif + dec r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + dec r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + eor r10,r20 + eor r11,r21 + eor r12,r22 + eor r13,r23 +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + movw r20,r12 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r12,r22 + and r13,r22 + lsl r12 + rol r13 + lsl r12 + rol r13 + or r12,r20 + or r13,r21 + movw r20,r24 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r24,r22 + and r25,r22 + lsl r24 + rol r25 + lsl r24 + rol r25 + or r24,r20 + or r25,r21 + movw r20,r4 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r4,r22 + and r5,r22 + lsl r4 + rol r5 + lsl r4 + rol r5 + or r4,r20 + or r5,r21 + movw r20,r8 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r8,r22 + and r9,r22 + lsl r8 + rol r9 + lsl r8 + rol r9 + or r8,r20 + or r9,r21 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Y+17 + ldd r3,Y+18 + ldd r4,Y+19 + ldd r5,Y+20 + ldd r6,Y+21 + ldd r7,Y+22 + ldd r8,Y+23 + ldd r9,Y+24 + ldd r10,Y+25 + ldd r11,Y+26 + ldd r12,Y+27 + ldd r13,Y+28 + ldd r14,Y+29 + ldd r15,Y+30 + ldd r24,Y+31 + ldd r25,Y+32 + ldd r0,Z+48 + eor r10,r0 + ldd r0,Z+49 + eor r11,r0 + ldd r0,Z+50 + eor r12,r0 + ldd r0,Z+51 + eor r13,r0 + ldd r0,Z+52 + eor r6,r0 + ldd r0,Z+53 + eor r7,r0 + ldd r0,Z+54 + eor r8,r0 + ldd r0,Z+55 + eor r9,r0 + ldd r0,Z+56 + eor r14,r0 + ldd r0,Z+57 + eor r15,r0 + ldd r0,Z+58 + eor r24,r0 + ldd r0,Z+59 + eor r25,r0 + ldd r0,Z+60 + eor r2,r0 + ldd r0,Z+61 + eor r3,r0 + ldd r0,Z+62 + eor r4,r0 + ldd r0,Z+63 + eor r5,r0 + movw r20,r10 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r10,r22 + and r11,r22 + lsl r10 + rol r11 + or r10,r20 + or r11,r21 + movw r20,r12 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r12,r22 + and r13,r22 + lsl r12 + rol r13 + lsl r12 + rol r13 + lsl r12 + rol r13 + or r12,r20 + or r13,r21 + movw r20,r6 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r6,r22 + and r7,r22 + lsl r6 + rol r7 + or r6,r20 + or r7,r21 + movw r20,r8 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r8,r22 + and r9,r22 + lsl r8 + rol r9 + lsl r8 + rol r9 + lsl r8 + rol r9 + or r8,r20 + or r9,r21 + movw r20,r14 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r14,r22 + and r15,r22 + lsl r14 + rol r15 + or r14,r20 + or r15,r21 + movw r20,r24 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r24,r22 + and r25,r22 + lsl r24 + rol r25 + lsl r24 + rol r25 + lsl r24 + rol r25 + or r24,r20 + or r25,r21 + movw r20,r2 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r2,r22 + and r3,r22 + lsl r2 + rol r3 + or r2,r20 + or r3,r21 + movw r20,r4 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r4,r22 + and r5,r22 + lsl r4 + rol r5 + lsl r4 + rol r5 + lsl r4 + rol r5 + or r4,r20 + or r5,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+5 + ldd r21,Y+6 + eor r20,r2 + eor r21,r3 + ldd r22,Y+1 + ldd r23,Y+2 + eor r22,r14 + eor r23,r15 + std Y+5,r22 + std Y+6,r23 + ldd r22,Y+13 + ldd r23,Y+14 + eor r22,r6 + eor r23,r7 + std Y+1,r22 + std Y+2,r23 + ldd r22,Y+9 + ldd r23,Y+10 + eor r22,r10 + eor r23,r11 + eor r20,r22 + eor r21,r23 + std Y+13,r22 + std Y+14,r23 + std Y+9,r20 + std Y+10,r21 + ldd r20,Y+7 + ldd r21,Y+8 + eor r20,r4 + eor r21,r5 + ldd r22,Y+3 + ldd r23,Y+4 + eor r22,r24 + eor r23,r25 + std Y+7,r22 + std Y+8,r23 + ldd r22,Y+15 + ldd r23,Y+16 + eor r22,r8 + eor r23,r9 + std Y+3,r22 + std Y+4,r23 + ldd r22,Y+11 + ldd r23,Y+12 + eor r22,r12 + eor r23,r13 + eor r20,r22 + eor r21,r23 + std Y+15,r22 + std Y+16,r23 + std Y+11,r20 + std Y+12,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+1 + ldd r23,Y+2 + std Y+5,r22 + std Y+6,r23 + ldd r22,Y+13 + ldd r23,Y+14 + std Y+1,r22 + std Y+2,r23 + ldd r22,Y+9 + ldd r23,Y+10 + eor r20,r22 + eor r21,r23 + std Y+13,r22 + std Y+14,r23 + std Y+9,r20 + std Y+10,r21 + ldd r20,Y+7 + ldd r21,Y+8 + ldd r22,Y+3 + ldd r23,Y+4 + std Y+7,r22 + std Y+8,r23 + ldd r22,Y+15 + ldd r23,Y+16 + std Y+3,r22 + std Y+4,r23 + ldd r22,Y+11 + ldd r23,Y+12 + eor r20,r22 + eor r21,r23 + std Y+15,r22 + std Y+16,r23 + std Y+11,r20 + std Y+12,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + movw r20,r2 + movw r22,r4 + movw r2,r14 + movw r4,r24 + movw r14,r6 + movw r24,r8 + movw r6,r10 + movw r8,r12 + movw r10,r20 + movw r12,r22 + eor r10,r6 + eor r11,r7 + eor r12,r8 + eor r13,r9 + ldd r0,Y+9 + eor r0,r10 + std Y+9,r0 + ldd r0,Y+10 + eor r0,r11 + std Y+10,r0 + ldd r0,Y+11 + eor r0,r12 + std Y+11,r0 + ldd r0,Y+12 + eor r0,r13 + std Y+12,r0 + ldd r0,Y+13 + eor r0,r6 + std Y+13,r0 + ldd r0,Y+14 + eor r0,r7 + std Y+14,r0 + ldd r0,Y+15 + eor r0,r8 + std Y+15,r0 + ldd r0,Y+16 + eor r0,r9 + std Y+16,r0 + ldd r0,Y+1 + eor r0,r14 + std Y+1,r0 + ldd r0,Y+2 + eor r0,r15 + std Y+2,r0 + ldd r0,Y+3 + eor r0,r24 + std Y+3,r0 + ldd r0,Y+4 + eor r0,r25 + std Y+4,r0 + ldd r0,Y+5 + eor r0,r2 + std Y+5,r0 + ldd r0,Y+6 + eor r0,r3 + std Y+6,r0 + ldd r0,Y+7 + eor r0,r4 + std Y+7,r0 + ldd r0,Y+8 + eor r0,r5 + std Y+8,r0 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r3,Y+2 + ldd r4,Y+3 + ldd r5,Y+4 + ldd r6,Y+5 + ldd r7,Y+6 + ldd r8,Y+7 + ldd r9,Y+8 + ldd r10,Y+9 + ldd r11,Y+10 + ldd r12,Y+11 + ldd r13,Y+12 + ldd r14,Y+13 + ldd r15,Y+14 + ldd r24,Y+15 + ldd r25,Y+16 + movw r20,r8 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r8,r22 + and r9,r22 + lsl r8 + rol r9 + lsl r8 + rol r9 + or r8,r20 + or r9,r21 + movw r20,r12 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r12,r22 + and r13,r22 + lsl r12 + rol r13 + lsl r12 + rol r13 + or r12,r20 + or r13,r21 + movw r20,r24 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r24,r22 + and r25,r22 + lsl r24 + rol r25 + lsl r24 + rol r25 + or r24,r20 + or r25,r21 + movw r20,r4 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r4,r22 + and r5,r22 + lsl r4 + rol r5 + lsl r4 + rol r5 + or r4,r20 + or r5,r21 + mov r0,r10 + or r0,r2 + eor r6,r0 + mov r0,r11 + or r0,r3 + eor r7,r0 + mov r0,r12 + or r0,r4 + eor r8,r0 + mov r0,r13 + or r0,r5 + eor r9,r0 + mov r0,r6 + or r0,r14 + eor r10,r0 + mov r0,r7 + or r0,r15 + eor r11,r0 + mov r0,r8 + or r0,r24 + eor r12,r0 + mov r0,r9 + or r0,r25 + eor r13,r0 + mov r0,r10 + and r0,r2 + eor r14,r0 + mov r0,r11 + and r0,r3 + eor r15,r0 + mov r0,r12 + and r0,r4 + eor r24,r0 + mov r0,r13 + and r0,r5 + eor r25,r0 + mov r0,r10 + or r0,r14 + eor r2,r0 + mov r0,r11 + or r0,r15 + eor r3,r0 + mov r0,r12 + or r0,r24 + eor r4,r0 + mov r0,r13 + or r0,r25 + eor r5,r0 + mov r0,r6 + or r0,r2 + eor r10,r0 + mov r0,r7 + or r0,r3 + eor r11,r0 + mov r0,r8 + or r0,r4 + eor r12,r0 + mov r0,r9 + or r0,r5 + eor r13,r0 + mov r0,r10 + and r0,r14 + eor r6,r0 + mov r0,r11 + and r0,r15 + eor r7,r0 + mov r0,r12 + and r0,r24 + eor r8,r0 + mov r0,r13 + and r0,r25 + eor r9,r0 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Y+17 + ldd r3,Y+18 + ldd r4,Y+19 + ldd r5,Y+20 + ldd r6,Y+21 + ldd r7,Y+22 + ldd r8,Y+23 + ldd r9,Y+24 + ldd r10,Y+25 + ldd r11,Y+26 + ldd r12,Y+27 + ldd r13,Y+28 + ldd r14,Y+29 + ldd r15,Y+30 + ldd r24,Y+31 + ldd r25,Y+32 + movw r20,r24 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r24,r22 + and r25,r22 + lsl r24 + rol r25 + or r24,r20 + or r25,r21 + movw r20,r14 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r14,r22 + and r15,r22 + lsl r14 + rol r15 + lsl r14 + rol r15 + lsl r14 + rol r15 + or r14,r20 + or r15,r21 + movw r20,r8 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r8,r22 + and r9,r22 + lsl r8 + rol r9 + or r8,r20 + or r9,r21 + movw r20,r6 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r6,r22 + and r7,r22 + lsl r6 + rol r7 + lsl r6 + rol r7 + lsl r6 + rol r7 + or r6,r20 + or r7,r21 + movw r20,r4 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r4,r22 + and r5,r22 + lsl r4 + rol r5 + or r4,r20 + or r5,r21 + movw r20,r2 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r2,r22 + and r3,r22 + lsl r2 + rol r3 + lsl r2 + rol r3 + lsl r2 + rol r3 + or r2,r20 + or r3,r21 + movw r20,r12 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r12,r22 + and r13,r22 + lsl r12 + rol r13 + or r12,r20 + or r13,r21 + movw r20,r10 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r10,r22 + and r11,r22 + lsl r10 + rol r11 + lsl r10 + rol r11 + lsl r10 + rol r11 + or r10,r20 + or r11,r21 + mov r0,r6 + or r0,r10 + eor r14,r0 + mov r0,r7 + or r0,r11 + eor r15,r0 + mov r0,r8 + or r0,r12 + eor r24,r0 + mov r0,r9 + or r0,r13 + eor r25,r0 + mov r0,r14 + or r0,r2 + eor r6,r0 + mov r0,r15 + or r0,r3 + eor r7,r0 + mov r0,r24 + or r0,r4 + eor r8,r0 + mov r0,r25 + or r0,r5 + eor r9,r0 + mov r0,r6 + and r0,r10 + eor r2,r0 + mov r0,r7 + and r0,r11 + eor r3,r0 + mov r0,r8 + and r0,r12 + eor r4,r0 + mov r0,r9 + and r0,r13 + eor r5,r0 + mov r0,r6 + or r0,r2 + eor r10,r0 + mov r0,r7 + or r0,r3 + eor r11,r0 + mov r0,r8 + or r0,r4 + eor r12,r0 + mov r0,r9 + or r0,r5 + eor r13,r0 + mov r0,r14 + or r0,r10 + eor r6,r0 + mov r0,r15 + or r0,r11 + eor r7,r0 + mov r0,r24 + or r0,r12 + eor r8,r0 + mov r0,r25 + or r0,r13 + eor r9,r0 + mov r0,r6 + and r0,r2 + eor r14,r0 + mov r0,r7 + and r0,r3 + eor r15,r0 + mov r0,r8 + and r0,r4 + eor r24,r0 + mov r0,r9 + and r0,r5 + eor r25,r0 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+1 + ldd r21,Y+2 + eor r20,r10 + eor r21,r11 + ldd r22,Y+13 + ldd r23,Y+14 + eor r22,r2 + eor r23,r3 + std Y+1,r22 + std Y+2,r23 + ldd r22,Y+9 + ldd r23,Y+10 + eor r22,r6 + eor r23,r7 + std Y+13,r22 + std Y+14,r23 + ldd r22,Y+5 + ldd r23,Y+6 + eor r22,r14 + eor r23,r15 + eor r20,r22 + eor r21,r23 + std Y+9,r22 + std Y+10,r23 + std Y+5,r20 + std Y+6,r21 + ldd r20,Y+3 + ldd r21,Y+4 + eor r20,r12 + eor r21,r13 + ldd r22,Y+15 + ldd r23,Y+16 + eor r22,r4 + eor r23,r5 + std Y+3,r22 + std Y+4,r23 + ldd r22,Y+11 + ldd r23,Y+12 + eor r22,r8 + eor r23,r9 + std Y+15,r22 + std Y+16,r23 + ldd r22,Y+7 + ldd r23,Y+8 + eor r22,r24 + eor r23,r25 + eor r20,r22 + eor r21,r23 + std Y+11,r22 + std Y+12,r23 + std Y+7,r20 + std Y+8,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+13 + ldd r23,Y+14 + std Y+1,r22 + std Y+2,r23 + ldd r22,Y+9 + ldd r23,Y+10 + std Y+13,r22 + std Y+14,r23 + ldd r22,Y+5 + ldd r23,Y+6 + eor r20,r22 + eor r21,r23 + std Y+9,r22 + std Y+10,r23 + std Y+5,r20 + std Y+6,r21 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r22,Y+15 + ldd r23,Y+16 + std Y+3,r22 + std Y+4,r23 + ldd r22,Y+11 + ldd r23,Y+12 + std Y+15,r22 + std Y+16,r23 + ldd r22,Y+7 + ldd r23,Y+8 + eor r20,r22 + eor r21,r23 + std Y+11,r22 + std Y+12,r23 + std Y+7,r20 + std Y+8,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + movw r20,r10 + movw r22,r12 + movw r10,r2 + movw r12,r4 + movw r2,r6 + movw r4,r8 + movw r6,r14 + movw r8,r24 + movw r14,r20 + movw r24,r22 + eor r14,r6 + eor r15,r7 + eor r24,r8 + eor r25,r9 + ldd r0,Y+5 + eor r0,r14 + std Y+5,r0 + ldd r0,Y+6 + eor r0,r15 + std Y+6,r0 + ldd r0,Y+7 + eor r0,r24 + std Y+7,r0 + ldd r0,Y+8 + eor r0,r25 + std Y+8,r0 + ldd r0,Y+9 + eor r0,r6 + std Y+9,r0 + ldd r0,Y+10 + eor r0,r7 + std Y+10,r0 + ldd r0,Y+11 + eor r0,r8 + std Y+11,r0 + ldd r0,Y+12 + eor r0,r9 + std Y+12,r0 + ldd r0,Y+13 + eor r0,r2 + std Y+13,r0 + ldd r0,Y+14 + eor r0,r3 + std Y+14,r0 + ldd r0,Y+15 + eor r0,r4 + std Y+15,r0 + ldd r0,Y+16 + eor r0,r5 + std Y+16,r0 + ldd r0,Y+1 + eor r0,r10 + std Y+1,r0 + ldd r0,Y+2 + eor r0,r11 + std Y+2,r0 + ldd r0,Y+3 + eor r0,r12 + std Y+3,r0 + ldd r0,Y+4 + eor r0,r13 + std Y+4,r0 + mov r0,r6 + or r0,r14 + eor r2,r0 + mov r0,r7 + or r0,r15 + eor r3,r0 + mov r0,r8 + or r0,r24 + eor r4,r0 + mov r0,r9 + or r0,r25 + eor r5,r0 + mov r0,r2 + or r0,r10 + eor r6,r0 + mov r0,r3 + or r0,r11 + eor r7,r0 + mov r0,r4 + or r0,r12 + eor r8,r0 + mov r0,r5 + or r0,r13 + eor r9,r0 + mov r0,r6 + and r0,r14 + eor r10,r0 + mov r0,r7 + and r0,r15 + eor r11,r0 + mov r0,r8 + and r0,r24 + eor r12,r0 + mov r0,r9 + and r0,r25 + eor r13,r0 + mov r0,r6 + or r0,r10 + eor r14,r0 + mov r0,r7 + or r0,r11 + eor r15,r0 + mov r0,r8 + or r0,r12 + eor r24,r0 + mov r0,r9 + or r0,r13 + eor r25,r0 + mov r0,r2 + or r0,r14 + eor r6,r0 + mov r0,r3 + or r0,r15 + eor r7,r0 + mov r0,r4 + or r0,r24 + eor r8,r0 + mov r0,r5 + or r0,r25 + eor r9,r0 + mov r0,r6 + and r0,r10 + eor r2,r0 + mov r0,r7 + and r0,r11 + eor r3,r0 + mov r0,r8 + and r0,r12 + eor r4,r0 + mov r0,r9 + and r0,r13 + eor r5,r0 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r3,Y+2 + ldd r4,Y+3 + ldd r5,Y+4 + ldd r6,Y+5 + ldd r7,Y+6 + ldd r8,Y+7 + ldd r9,Y+8 + ldd r10,Y+9 + ldd r11,Y+10 + ldd r12,Y+11 + ldd r13,Y+12 + ldd r14,Y+13 + ldd r15,Y+14 + ldd r24,Y+15 + ldd r25,Y+16 + mov r0,r6 + or r0,r14 + eor r2,r0 + mov r0,r7 + or r0,r15 + eor r3,r0 + mov r0,r8 + or r0,r24 + eor r4,r0 + mov r0,r9 + or r0,r25 + eor r5,r0 + mov r0,r2 + or r0,r10 + eor r6,r0 + mov r0,r3 + or r0,r11 + eor r7,r0 + mov r0,r4 + or r0,r12 + eor r8,r0 + mov r0,r5 + or r0,r13 + eor r9,r0 + mov r0,r6 + and r0,r14 + eor r10,r0 + mov r0,r7 + and r0,r15 + eor r11,r0 + mov r0,r8 + and r0,r24 + eor r12,r0 + mov r0,r9 + and r0,r25 + eor r13,r0 + mov r0,r6 + or r0,r10 + eor r14,r0 + mov r0,r7 + or r0,r11 + eor r15,r0 + mov r0,r8 + or r0,r12 + eor r24,r0 + mov r0,r9 + or r0,r13 + eor r25,r0 + mov r0,r2 + or r0,r14 + eor r6,r0 + mov r0,r3 + or r0,r15 + eor r7,r0 + mov r0,r4 + or r0,r24 + eor r8,r0 + mov r0,r5 + or r0,r25 + eor r9,r0 + mov r0,r6 + and r0,r10 + eor r2,r0 + mov r0,r7 + and r0,r11 + eor r3,r0 + mov r0,r8 + and r0,r12 + eor r4,r0 + mov r0,r9 + and r0,r13 + eor r5,r0 + ret +2560: + ldd r26,Y+33 + ldd r27,Y+34 + ld r0,Z + eor r2,r0 + ldd r0,Z+1 + eor r3,r0 + ldd r0,Z+2 + eor r4,r0 + ldd r0,Z+3 + eor r5,r0 + ldd r0,Z+4 + eor r6,r0 + ldd r0,Z+5 + eor r7,r0 + ldd r0,Z+6 + eor r8,r0 + ldd r0,Z+7 + eor r9,r0 + ldd r0,Z+8 + eor r10,r0 + ldd r0,Z+9 + eor r11,r0 + ldd r0,Z+10 + eor r12,r0 + ldd r0,Z+11 + eor r13,r0 + ldd r0,Z+12 + eor r14,r0 + ldd r0,Z+13 + eor r15,r0 + ldd r0,Z+14 + eor r24,r0 + ldd r0,Z+15 + eor r25,r0 + st X+,r2 + st X+,r3 + st X+,r6 + st X+,r7 + st X+,r10 + st X+,r11 + st X+,r14 + st X+,r15 + ldd r20,Y+17 + ldd r21,Y+18 + ldd r0,Z+16 + eor r20,r0 + ldd r0,Z+17 + eor r21,r0 + st X+,r20 + st X+,r21 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r0,Z+20 + eor r20,r0 + ldd r0,Z+21 + eor r21,r0 + st X+,r20 + st X+,r21 + ldd r20,Y+25 + ldd r21,Y+26 + ldd r0,Z+24 + eor r20,r0 + ldd r0,Z+25 + eor r21,r0 + st X+,r20 + st X+,r21 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r0,Z+28 + eor r20,r0 + ldd r0,Z+29 + eor r21,r0 + st X+,r20 + st X+,r21 + st X+,r4 + st X+,r5 + st X+,r8 + st X+,r9 + st X+,r12 + st X+,r13 + st X+,r24 + st X+,r25 + ldd r20,Y+19 + ldd r21,Y+20 + ldd r0,Z+18 + eor r20,r0 + ldd r0,Z+19 + eor r21,r0 + st X+,r20 + st X+,r21 + ldd r20,Y+23 + ldd r21,Y+24 + ldd r0,Z+22 + eor r20,r0 + ldd r0,Z+23 + eor r21,r0 + st X+,r20 + st X+,r21 + ldd r20,Y+27 + ldd r21,Y+28 + ldd r0,Z+26 + eor r20,r0 + ldd r0,Z+27 + eor r21,r0 + st X+,r20 + st X+,r21 + ldd r20,Y+31 + ldd r21,Y+32 + ldd r0,Z+30 + eor r20,r0 + ldd r0,Z+31 + eor r21,r0 + st X+,r20 + st X+,r21 + adiw r28,34 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size saturnin_decrypt_block, .-saturnin_decrypt_block + +#endif diff --git a/saturnin/Implementations/crypto_aead/saturninctrcascadev2/rhys/internal-saturnin.c b/saturnin/Implementations/crypto_aead/saturninctrcascadev2/rhys/internal-saturnin.c new file mode 100644 index 0000000..f4be50d --- /dev/null +++ b/saturnin/Implementations/crypto_aead/saturninctrcascadev2/rhys/internal-saturnin.c @@ -0,0 +1,483 @@ +/* + * Copyright (C) 2020 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "internal-saturnin.h" + +#if !defined(__AVR__) + +/* Round constants for various combinations of rounds and domain_sep */ +static uint32_t const saturnin_rc[] = { + /* RC_10_1 */ + 0x4eb026c2, 0x90595303, 0xaa8fe632, 0xfe928a92, 0x4115a419, + 0x93539532, 0x5db1cc4e, 0x541515ca, 0xbd1f55a8, 0x5a6e1a0d, + /* RC_10_2 */ + 0x4e4526b5, 0xa3565ff0, 0x0f8f20d8, 0x0b54bee1, 0x7d1a6c9d, + 0x17a6280a, 0xaa46c986, 0xc1199062, 0x182c5cde, 0xa00d53fe, + /* RC_10_3 */ + 0x4e162698, 0xb2535ba1, 0x6c8f9d65, 0x5816ad30, 0x691fd4fa, + 0x6bf5bcf9, 0xf8eb3525, 0xb21decfa, 0x7b3da417, 0xf62c94b4, + /* RC_10_4 */ + 0x4faf265b, 0xc5484616, 0x45dcad21, 0xe08bd607, 0x0504fdb8, + 0x1e1f5257, 0x45fbc216, 0xeb529b1f, 0x52194e32, 0x5498c018, + /* RC_10_5 */ + 0x4ffc2676, 0xd44d4247, 0x26dc109c, 0xb3c9c5d6, 0x110145df, + 0x624cc6a4, 0x17563eb5, 0x9856e787, 0x3108b6fb, 0x02b90752, + /* RC_10_6 */ + 0x4f092601, 0xe7424eb4, 0x83dcd676, 0x460ff1a5, 0x2d0e8d5b, + 0xe6b97b9c, 0xe0a13b7d, 0x0d5a622f, 0x943bbf8d, 0xf8da4ea1, + /* RC_16_7 */ + 0x3fba180c, 0x563ab9ab, 0x125ea5ef, 0x859da26c, 0xb8cf779b, + 0x7d4de793, 0x07efb49f, 0x8d525306, 0x1e08e6ab, 0x41729f87, + 0x8c4aef0a, 0x4aa0c9a7, 0xd93a95ef, 0xbb00d2af, 0xb62c5bf0, + 0x386d94d8, + /* RC_16_8 */ + 0x3c9b19a7, 0xa9098694, 0x23f878da, 0xa7b647d3, 0x74fc9d78, + 0xeacaae11, 0x2f31a677, 0x4cc8c054, 0x2f51ca05, 0x5268f195, + 0x4f5b8a2b, 0xf614b4ac, 0xf1d95401, 0x764d2568, 0x6a493611, + 0x8eef9c3e +}; + +/* Loads a 32-bit word from the two halves of a 256-bit Saturnin input block */ +#define saturnin_load_word32(ptr) \ + ((((uint32_t)((ptr)[17])) << 24) | \ + (((uint32_t)((ptr)[16])) << 16) | \ + (((uint32_t)((ptr)[1])) << 8) | \ + ((uint32_t)((ptr)[0]))) + +/* Stores a 32-bit word to the two halves of a 256-bit Saturnin output block */ +#define saturnin_store_word32(ptr, x) \ + do { \ + (ptr)[0] = (uint8_t)(x); \ + (ptr)[1] = (uint8_t)((x) >> 8); \ + (ptr)[16] = (uint8_t)((x) >> 16); \ + (ptr)[17] = (uint8_t)((x) >> 24); \ + } while (0) + +/* Rotate the 4-bit nibbles within a 16-bit word left */ +#define leftRotate4_N(a, mask1, bits1, mask2, bits2) \ + do { \ + (a) = (((a) & (mask1)) << (bits1)) | \ + (((a) & ((mask1) ^ (uint32_t)0xFFFFU)) >> (4 - (bits1))) | \ + (((a) & (((uint32_t)(mask2)) << 16)) << (bits2)) | \ + (((a) & (((uint32_t)((mask2)) << 16) ^ 0xFFFF0000U)) >> (4 - (bits2))); \ + } while (0) + +/* Rotate 16-bit subwords left */ +#define leftRotate16_N(a, mask1, bits1, mask2, bits2) \ + do { \ + (a) = (((a) & (mask1)) << (bits1)) | \ + (((a) & ((mask1) ^ (uint32_t)0xFFFFU)) >> (16 - (bits1))) | \ + (((a) & (((uint32_t)(mask2)) << 16)) << (bits2)) | \ + (((a) & (((uint32_t)((mask2)) << 16) ^ 0xFFFF0000U)) >> (16 - (bits2))); \ + } while (0) + +/** + * \brief XOR the key into the Saturnin state. + * + * \param x0 First word of the bit-sliced state. + * \param x1 Second word of the bit-sliced state. + * \param x2 Third word of the bit-sliced state. + * \param x3 Fourth word of the bit-sliced state. + * \param x4 Fifth word of the bit-sliced state. + * \param x5 Sixth word of the bit-sliced state. + * \param x6 Seventh word of the bit-sliced state. + * \param x7 Eighth word of the bit-sliced state. + */ +#define saturnin_xor_key(x0, x1, x2, x3, x4, x5, x6, x7) \ + do { \ + x0 ^= ks->k[0]; \ + x1 ^= ks->k[1]; \ + x2 ^= ks->k[2]; \ + x3 ^= ks->k[3]; \ + x4 ^= ks->k[4]; \ + x5 ^= ks->k[5]; \ + x6 ^= ks->k[6]; \ + x7 ^= ks->k[7]; \ + } while (0) + +/** + * \brief XOR a rotated version of the key into the Saturnin state. + * + * \param x0 First word of the bit-sliced state. + * \param x1 Second word of the bit-sliced state. + * \param x2 Third word of the bit-sliced state. + * \param x3 Fourth word of the bit-sliced state. + * \param x4 Fifth word of the bit-sliced state. + * \param x5 Sixth word of the bit-sliced state. + * \param x6 Seventh word of the bit-sliced state. + * \param x7 Eighth word of the bit-sliced state. + */ +#define saturnin_xor_key_rotated(x0, x1, x2, x3, x4, x5, x6, x7) \ + do { \ + x0 ^= ks->k[8]; \ + x1 ^= ks->k[9]; \ + x2 ^= ks->k[10]; \ + x3 ^= ks->k[11]; \ + x4 ^= ks->k[12]; \ + x5 ^= ks->k[13]; \ + x6 ^= ks->k[14]; \ + x7 ^= ks->k[15]; \ + } while (0) + +/** + * \brief Applies the Saturnin S-box to a bit-sliced set of nibbles. + * + * \param a First bit-slice. + * \param b Second bit-slice. + * \param c Third bit-slice. + * \param d Fourth bit-slice. + * + * The S-box also involves a rotation on the output words. We perform the + * rotation implicitly in the higher layers. + */ +#define saturnin_sbox(a, b, c, d) \ + do { \ + (a) ^= (b) & (c); \ + (b) ^= (a) | (d); \ + (d) ^= (b) | (c); \ + (c) ^= (b) & (d); \ + (b) ^= (a) | (c); \ + (a) ^= (b) | (d); \ + } while (0) + +/** + * \brief Applies the inverse of the Saturnin S-box to a set of nibbles. + * + * \param a First bit-slice. + * \param b Second bit-slice. + * \param c Third bit-slice. + * \param d Fourth bit-slice. + * + * The inverse of the S-box also involves a rotation on the input words. + * We perform the rotation implicitly in the higher layers. + */ +#define saturnin_sbox_inverse(a, b, c, d) \ + do { \ + (a) ^= (b) | (d); \ + (b) ^= (a) | (c); \ + (c) ^= (b) & (d); \ + (d) ^= (b) | (c); \ + (b) ^= (a) | (d); \ + (a) ^= (b) & (c); \ + } while (0) + +/* Helpers for MDS matrix operations */ +#define SWAP(a) (((a) << 16) | ((a) >> 16)) +#define MUL(x0, x1, x2, x3) \ + do { \ + temp = x0; x0 = x1; x1 = x2; x2 = x3; x3 = temp ^ x0; \ + } while (0) +#define MULINV(x0, x1, x2, x3) \ + do { \ + temp = x3; x3 = x2; x2 = x1; x1 = x0; x0 = x1 ^ temp; \ + } while (0) + +/** + * \brief Applies the MDS matrix to the Saturnin state. + * + * \param x0 First word of the bit-sliced state. + * \param x1 Second word of the bit-sliced state. + * \param x2 Third word of the bit-sliced state. + * \param x3 Fourth word of the bit-sliced state. + * \param x4 Fifth word of the bit-sliced state. + * \param x5 Sixth word of the bit-sliced state. + * \param x6 Seventh word of the bit-sliced state. + * \param x7 Eighth word of the bit-sliced state. + */ +#define saturnin_mds(x0, x1, x2, x3, x4, x5, x6, x7) \ + do { \ + x0 ^= x4; x1 ^= x5; x2 ^= x6; x3 ^= x7; \ + MUL(x4, x5, x6, x7); \ + x4 ^= SWAP(x0); x5 ^= SWAP(x1); \ + x6 ^= SWAP(x2); x7 ^= SWAP(x3); \ + MUL(x0, x1, x2, x3); \ + MUL(x0, x1, x2, x3); \ + x0 ^= x4; x1 ^= x5; x2 ^= x6; x3 ^= x7; \ + x4 ^= SWAP(x0); x5 ^= SWAP(x1); \ + x6 ^= SWAP(x2); x7 ^= SWAP(x3); \ + } while (0) + +/** + * \brief Applies the inverse of the MDS matrix to the Saturnin state. + * + * \param x0 First word of the bit-sliced state. + * \param x1 Second word of the bit-sliced state. + * \param x2 Third word of the bit-sliced state. + * \param x3 Fourth word of the bit-sliced state. + * \param x4 Fifth word of the bit-sliced state. + * \param x5 Sixth word of the bit-sliced state. + * \param x6 Seventh word of the bit-sliced state. + * \param x7 Eighth word of the bit-sliced state. + */ +#define saturnin_mds_inverse(x0, x1, x2, x3, x4, x5, x6, x7) \ + do { \ + x6 ^= SWAP(x2); x7 ^= SWAP(x3); \ + x4 ^= SWAP(x0); x5 ^= SWAP(x1); \ + x0 ^= x4; x1 ^= x5; x2 ^= x6; x3 ^= x7; \ + MULINV(x0, x1, x2, x3); \ + MULINV(x0, x1, x2, x3); \ + x6 ^= SWAP(x2); x7 ^= SWAP(x3); \ + x4 ^= SWAP(x0); x5 ^= SWAP(x1); \ + MULINV(x4, x5, x6, x7); \ + x0 ^= x4; x1 ^= x5; x2 ^= x6; x3 ^= x7; \ + } while (0) + +/** + * \brief Applies the slice permutation to the Saturnin state. + * + * \param x0 First word of the bit-sliced state. + * \param x1 Second word of the bit-sliced state. + * \param x2 Third word of the bit-sliced state. + * \param x3 Fourth word of the bit-sliced state. + * \param x4 Fifth word of the bit-sliced state. + * \param x5 Sixth word of the bit-sliced state. + * \param x6 Seventh word of the bit-sliced state. + * \param x7 Eighth word of the bit-sliced state. + */ +#define saturnin_slice(x0, x1, x2, x3, x4, x5, x6, x7) \ + do { \ + leftRotate4_N(x0, 0xFFFFU, 0, 0x3333, 2); \ + leftRotate4_N(x1, 0xFFFFU, 0, 0x3333, 2); \ + leftRotate4_N(x2, 0xFFFFU, 0, 0x3333, 2); \ + leftRotate4_N(x3, 0xFFFFU, 0, 0x3333, 2); \ + leftRotate4_N(x4, 0x7777U, 1, 0x1111, 3); \ + leftRotate4_N(x5, 0x7777U, 1, 0x1111, 3); \ + leftRotate4_N(x6, 0x7777U, 1, 0x1111, 3); \ + leftRotate4_N(x7, 0x7777U, 1, 0x1111, 3); \ + } while (0) + +/** + * \brief Applies the inverse of the slice permutation to the Saturnin state. + * + * \param x0 First word of the bit-sliced state. + * \param x1 Second word of the bit-sliced state. + * \param x2 Third word of the bit-sliced state. + * \param x3 Fourth word of the bit-sliced state. + * \param x4 Fifth word of the bit-sliced state. + * \param x5 Sixth word of the bit-sliced state. + * \param x6 Seventh word of the bit-sliced state. + * \param x7 Eighth word of the bit-sliced state. + */ +#define saturnin_slice_inverse(x0, x1, x2, x3, x4, x5, x6, x7) \ + do { \ + leftRotate4_N(x0, 0xFFFFU, 0, 0x3333, 2); \ + leftRotate4_N(x1, 0xFFFFU, 0, 0x3333, 2); \ + leftRotate4_N(x2, 0xFFFFU, 0, 0x3333, 2); \ + leftRotate4_N(x3, 0xFFFFU, 0, 0x3333, 2); \ + leftRotate4_N(x4, 0x1111U, 3, 0x7777, 1); \ + leftRotate4_N(x5, 0x1111U, 3, 0x7777, 1); \ + leftRotate4_N(x6, 0x1111U, 3, 0x7777, 1); \ + leftRotate4_N(x7, 0x1111U, 3, 0x7777, 1); \ + } while (0) + +/** + * \brief Applies the sheet permutation to the Saturnin state. + * + * \param x0 First word of the bit-sliced state. + * \param x1 Second word of the bit-sliced state. + * \param x2 Third word of the bit-sliced state. + * \param x3 Fourth word of the bit-sliced state. + * \param x4 Fifth word of the bit-sliced state. + * \param x5 Sixth word of the bit-sliced state. + * \param x6 Seventh word of the bit-sliced state. + * \param x7 Eighth word of the bit-sliced state. + */ +#define saturnin_sheet(x0, x1, x2, x3, x4, x5, x6, x7) \ + do { \ + leftRotate16_N(x0, 0xFFFFU, 0, 0x00FF, 8); \ + leftRotate16_N(x1, 0xFFFFU, 0, 0x00FF, 8); \ + leftRotate16_N(x2, 0xFFFFU, 0, 0x00FF, 8); \ + leftRotate16_N(x3, 0xFFFFU, 0, 0x00FF, 8); \ + leftRotate16_N(x4, 0x0FFFU, 4, 0x000F, 12); \ + leftRotate16_N(x5, 0x0FFFU, 4, 0x000F, 12); \ + leftRotate16_N(x6, 0x0FFFU, 4, 0x000F, 12); \ + leftRotate16_N(x7, 0x0FFFU, 4, 0x000F, 12); \ + } while (0) + +/** + * \brief Applies the inverse of the sheet permutation to the Saturnin state. + * + * \param x0 First word of the bit-sliced state. + * \param x1 Second word of the bit-sliced state. + * \param x2 Third word of the bit-sliced state. + * \param x3 Fourth word of the bit-sliced state. + * \param x4 Fifth word of the bit-sliced state. + * \param x5 Sixth word of the bit-sliced state. + * \param x6 Seventh word of the bit-sliced state. + * \param x7 Eighth word of the bit-sliced state. + */ +#define saturnin_sheet_inverse(x0, x1, x2, x3, x4, x5, x6, x7) \ + do { \ + leftRotate16_N(x0, 0xFFFFU, 0, 0x00FF, 8); \ + leftRotate16_N(x1, 0xFFFFU, 0, 0x00FF, 8); \ + leftRotate16_N(x2, 0xFFFFU, 0, 0x00FF, 8); \ + leftRotate16_N(x3, 0xFFFFU, 0, 0x00FF, 8); \ + leftRotate16_N(x4, 0x000FU, 12, 0x0FFF, 4); \ + leftRotate16_N(x5, 0x000FU, 12, 0x0FFF, 4); \ + leftRotate16_N(x6, 0x000FU, 12, 0x0FFF, 4); \ + leftRotate16_N(x7, 0x000FU, 12, 0x0FFF, 4); \ + } while (0) + +void saturnin_setup_key + (saturnin_key_schedule_t *ks, const unsigned char *key) +{ + int index; + uint32_t temp; + for (index = 0; index < 16; index += 2) { + temp = saturnin_load_word32(key + index); + ks->k[index / 2] = temp; + ks->k[8 + (index / 2)] = ((temp & 0x001F001FU) << 11) | + ((temp >> 5) & 0x07FF07FFU); + } +} + +void saturnin_encrypt_block + (const saturnin_key_schedule_t *ks, unsigned char *output, + const unsigned char *input, unsigned domain) +{ + unsigned rounds = (domain >= SATURNIN_DOMAIN_16_7) ? 8 : 5; + const uint32_t *rc = saturnin_rc + domain; + uint32_t x0, x1, x2, x3, x4, x5, x6, x7, temp; + + /* Load the input into local variables */ + x0 = saturnin_load_word32(input); + x1 = saturnin_load_word32(input + 2); + x2 = saturnin_load_word32(input + 4); + x3 = saturnin_load_word32(input + 6); + x4 = saturnin_load_word32(input + 8); + x5 = saturnin_load_word32(input + 10); + x6 = saturnin_load_word32(input + 12); + x7 = saturnin_load_word32(input + 14); + + /* XOR the key into the state */ + saturnin_xor_key(x0, x1, x2, x3, x4, x5, x6, x7); + + /* Perform all encryption rounds, two at a time */ + for (; rounds > 0; --rounds, rc += 2) { + /* Even rounds */ + saturnin_sbox(x0, x1, x2, x3); + saturnin_sbox(x4, x5, x6, x7); + saturnin_mds(x1, x2, x3, x0, x7, x5, x4, x6); + saturnin_sbox(x1, x2, x3, x0); + saturnin_sbox(x7, x5, x4, x6); + saturnin_slice(x2, x3, x0, x1, x6, x5, x7, x4); + saturnin_mds(x2, x3, x0, x1, x6, x5, x7, x4); + saturnin_slice_inverse(x2, x3, x0, x1, x6, x5, x7, x4); + x2 ^= rc[0]; + saturnin_xor_key_rotated(x2, x3, x0, x1, x6, x5, x7, x4); + + /* Odd rounds */ + saturnin_sbox(x2, x3, x0, x1); + saturnin_sbox(x6, x5, x7, x4); + saturnin_mds(x3, x0, x1, x2, x4, x5, x6, x7); + saturnin_sbox(x3, x0, x1, x2); + saturnin_sbox(x4, x5, x6, x7); + saturnin_sheet(x0, x1, x2, x3, x7, x5, x4, x6); + saturnin_mds(x0, x1, x2, x3, x7, x5, x4, x6); + saturnin_sheet_inverse(x0, x1, x2, x3, x7, x5, x4, x6); + x0 ^= rc[1]; + saturnin_xor_key(x0, x1, x2, x3, x7, x5, x4, x6); + + /* Correct the rotation of the second half before the next round */ + temp = x4; + x4 = x7; + x7 = x6; + x6 = temp; + } + + /* Store the local variables to the output buffer */ + saturnin_store_word32(output, x0); + saturnin_store_word32(output + 2, x1); + saturnin_store_word32(output + 4, x2); + saturnin_store_word32(output + 6, x3); + saturnin_store_word32(output + 8, x4); + saturnin_store_word32(output + 10, x5); + saturnin_store_word32(output + 12, x6); + saturnin_store_word32(output + 14, x7); +} + +void saturnin_decrypt_block + (const saturnin_key_schedule_t *ks, unsigned char *output, + const unsigned char *input, unsigned domain) +{ + unsigned rounds = (domain >= SATURNIN_DOMAIN_16_7) ? 8 : 5; + const uint32_t *rc = saturnin_rc + domain + (rounds - 1) * 2; + uint32_t x0, x1, x2, x3, x4, x5, x6, x7, temp; + + /* Load the input into local variables */ + x0 = saturnin_load_word32(input); + x1 = saturnin_load_word32(input + 2); + x2 = saturnin_load_word32(input + 4); + x3 = saturnin_load_word32(input + 6); + x4 = saturnin_load_word32(input + 8); + x5 = saturnin_load_word32(input + 10); + x6 = saturnin_load_word32(input + 12); + x7 = saturnin_load_word32(input + 14); + + /* Perform all decryption rounds, two at a time */ + for (; rounds > 0; --rounds, rc -= 2) { + /* Correct the rotation of the second half before the next round */ + temp = x6; + x6 = x7; + x7 = x4; + x4 = temp; + + /* Odd rounds */ + saturnin_xor_key(x0, x1, x2, x3, x7, x5, x4, x6); + x0 ^= rc[1]; + saturnin_sheet(x0, x1, x2, x3, x7, x5, x4, x6); + saturnin_mds_inverse(x0, x1, x2, x3, x7, x5, x4, x6); + saturnin_sheet_inverse(x0, x1, x2, x3, x7, x5, x4, x6); + saturnin_sbox_inverse(x3, x0, x1, x2); + saturnin_sbox_inverse(x4, x5, x6, x7); + saturnin_mds_inverse(x3, x0, x1, x2, x4, x5, x6, x7); + saturnin_sbox_inverse(x2, x3, x0, x1); + saturnin_sbox_inverse(x6, x5, x7, x4); + + /* Even rounds */ + saturnin_xor_key_rotated(x2, x3, x0, x1, x6, x5, x7, x4); + x2 ^= rc[0]; + saturnin_slice(x2, x3, x0, x1, x6, x5, x7, x4); + saturnin_mds_inverse(x2, x3, x0, x1, x6, x5, x7, x4); + saturnin_slice_inverse(x2, x3, x0, x1, x6, x5, x7, x4); + saturnin_sbox_inverse(x1, x2, x3, x0); + saturnin_sbox_inverse(x7, x5, x4, x6); + saturnin_mds_inverse(x1, x2, x3, x0, x7, x5, x4, x6); + saturnin_sbox_inverse(x0, x1, x2, x3); + saturnin_sbox_inverse(x4, x5, x6, x7); + } + + /* XOR the key into the state */ + saturnin_xor_key(x0, x1, x2, x3, x4, x5, x6, x7); + + /* Store the local variables to the output buffer */ + saturnin_store_word32(output, x0); + saturnin_store_word32(output + 2, x1); + saturnin_store_word32(output + 4, x2); + saturnin_store_word32(output + 6, x3); + saturnin_store_word32(output + 8, x4); + saturnin_store_word32(output + 10, x5); + saturnin_store_word32(output + 12, x6); + saturnin_store_word32(output + 14, x7); +} + +#endif /* !__AVR__ */ diff --git a/saturnin/Implementations/crypto_aead/saturninctrcascadev2/rhys/internal-saturnin.h b/saturnin/Implementations/crypto_aead/saturninctrcascadev2/rhys/internal-saturnin.h new file mode 100644 index 0000000..8af07c3 --- /dev/null +++ b/saturnin/Implementations/crypto_aead/saturninctrcascadev2/rhys/internal-saturnin.h @@ -0,0 +1,137 @@ +/* + * Copyright (C) 2020 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef LW_INTERNAL_SATURNIN_H +#define LW_INTERNAL_SATURNIN_H + +/** + * \file internal-saturnin.h + * \brief Saturnin block cipher. + * + * References: https://project.inria.fr/saturnin/ + */ + +#include "internal-util.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * \brief Size of a Saturnin block in bytes. + */ +#define SATURNIN_BLOCK_SIZE 32 + +/** + * \brief Domain separator index 1 for the 10-round version of Saturnin. + */ +#define SATURNIN_DOMAIN_10_1 0 + +/** + * \brief Domain separator index 2 for the 10-round version of Saturnin. + */ +#define SATURNIN_DOMAIN_10_2 10 + +/** + * \brief Domain separator index 3 for the 10-round version of Saturnin. + */ +#define SATURNIN_DOMAIN_10_3 20 + +/** + * \brief Domain separator index 4 for the 10-round version of Saturnin. + */ +#define SATURNIN_DOMAIN_10_4 30 + +/** + * \brief Domain separator index 5 for the 10-round version of Saturnin. + */ +#define SATURNIN_DOMAIN_10_5 40 + +/** + * \brief Domain separator index 6 for the 10-round version of Saturnin. + */ +#define SATURNIN_DOMAIN_10_6 50 + +/** + * \brief Domain separator index 7 for the 16-round version of Saturnin. + */ +#define SATURNIN_DOMAIN_16_7 60 + +/** + * \brief Domain separator index 8 for the 16-round version of Saturnin. + */ +#define SATURNIN_DOMAIN_16_8 76 + +/** + * \brief Structure of the key schedule for Saturnin. + */ +typedef struct +{ + /** Pre-computed round keys for Saturnin */ + uint32_t k[16]; + +} saturnin_key_schedule_t; + +/** + * \brief Sets up a key schedule for Saturnin. + * + * \param ks Points to the key schedule to initialize. + * \param key Points to the 32 bytes of the key data. + */ +void saturnin_setup_key + (saturnin_key_schedule_t *ks, const unsigned char *key); + +/** + * \brief Encrypts a 256-bit block with Saturnin. + * + * \param ks Points to the Saturnin key schedule. + * \param output Output buffer which must be at least 32 bytes in length. + * \param input Input buffer which must be at least 32 bytes in length. + * \param domain Domain separator and round count indicator. + * + * The \a input and \a output buffers can be the same buffer for + * in-place encryption. + */ +void saturnin_encrypt_block + (const saturnin_key_schedule_t *ks, unsigned char *output, + const unsigned char *input, unsigned domain); + +/** + * \brief Decrypts a 256-bit block with Saturnin. + * + * \param ks Points to the Saturnin key schedule. + * \param output Output buffer which must be at least 32 bytes in length. + * \param input Input buffer which must be at least 32 bytes in length. + * \param domain Domain separator and round count indicator. + * + * The \a input and \a output buffers can be the same buffer for + * in-place decryption. + */ +void saturnin_decrypt_block + (const saturnin_key_schedule_t *ks, unsigned char *output, + const unsigned char *input, unsigned domain); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/saturnin/Implementations/crypto_aead/saturninctrcascadev2/rhys/saturnin.c b/saturnin/Implementations/crypto_aead/saturninctrcascadev2/rhys/saturnin.c index 734fc69..d2bd2cc 100644 --- a/saturnin/Implementations/crypto_aead/saturninctrcascadev2/rhys/saturnin.c +++ b/saturnin/Implementations/crypto_aead/saturninctrcascadev2/rhys/saturnin.c @@ -21,7 +21,7 @@ */ #include "saturnin.h" -#include "internal-util.h" +#include "internal-saturnin.h" #include aead_cipher_t const saturnin_cipher = { @@ -57,440 +57,22 @@ aead_hash_algorithm_t const saturnin_hash_algorithm = { 0 /* squeeze */ }; -/* Round constant tables for various combinations of rounds and domain_sep */ -static uint32_t const RC_10_1[] = { - 0x4eb026c2, 0x90595303, 0xaa8fe632, 0xfe928a92, 0x4115a419, - 0x93539532, 0x5db1cc4e, 0x541515ca, 0xbd1f55a8, 0x5a6e1a0d -}; -static uint32_t const RC_10_2[] = { - 0x4e4526b5, 0xa3565ff0, 0x0f8f20d8, 0x0b54bee1, 0x7d1a6c9d, - 0x17a6280a, 0xaa46c986, 0xc1199062, 0x182c5cde, 0xa00d53fe -}; -static uint32_t const RC_10_3[] = { - 0x4e162698, 0xb2535ba1, 0x6c8f9d65, 0x5816ad30, 0x691fd4fa, - 0x6bf5bcf9, 0xf8eb3525, 0xb21decfa, 0x7b3da417, 0xf62c94b4 -}; -static uint32_t const RC_10_4[] = { - 0x4faf265b, 0xc5484616, 0x45dcad21, 0xe08bd607, 0x0504fdb8, - 0x1e1f5257, 0x45fbc216, 0xeb529b1f, 0x52194e32, 0x5498c018 -}; -static uint32_t const RC_10_5[] = { - 0x4ffc2676, 0xd44d4247, 0x26dc109c, 0xb3c9c5d6, 0x110145df, - 0x624cc6a4, 0x17563eb5, 0x9856e787, 0x3108b6fb, 0x02b90752 -}; -static uint32_t const RC_10_6[] = { - 0x4f092601, 0xe7424eb4, 0x83dcd676, 0x460ff1a5, 0x2d0e8d5b, - 0xe6b97b9c, 0xe0a13b7d, 0x0d5a622f, 0x943bbf8d, 0xf8da4ea1 -}; -static uint32_t const RC_16_7[] = { - 0x3fba180c, 0x563ab9ab, 0x125ea5ef, 0x859da26c, 0xb8cf779b, - 0x7d4de793, 0x07efb49f, 0x8d525306, 0x1e08e6ab, 0x41729f87, - 0x8c4aef0a, 0x4aa0c9a7, 0xd93a95ef, 0xbb00d2af, 0xb62c5bf0, - 0x386d94d8 -}; -static uint32_t const RC_16_8[] = { - 0x3c9b19a7, 0xa9098694, 0x23f878da, 0xa7b647d3, 0x74fc9d78, - 0xeacaae11, 0x2f31a677, 0x4cc8c054, 0x2f51ca05, 0x5268f195, - 0x4f5b8a2b, 0xf614b4ac, 0xf1d95401, 0x764d2568, 0x6a493611, - 0x8eef9c3e -}; - -/* Rotate the 4-bit nibbles within a 16-bit word left */ -#define leftRotate4_N(a, mask1, bits1, mask2, bits2) \ - do { \ - uint32_t _temp = (a); \ - (a) = ((_temp & (mask1)) << (bits1)) | \ - ((_temp & ((mask1) ^ (uint32_t)0xFFFFU)) >> (4 - (bits1))) | \ - ((_temp & (((uint32_t)(mask2)) << 16)) << (bits2)) | \ - ((_temp & (((uint32_t)((mask2)) << 16) ^ 0xFFFF0000U)) >> (4 - (bits2))); \ - } while (0) - -/* Rotate 16-bit subwords left */ -#define leftRotate16_N(a, mask1, bits1, mask2, bits2) \ - do { \ - uint32_t _temp = (a); \ - (a) = ((_temp & (mask1)) << (bits1)) | \ - ((_temp & ((mask1) ^ (uint32_t)0xFFFFU)) >> (16 - (bits1))) | \ - ((_temp & (((uint32_t)(mask2)) << 16)) << (bits2)) | \ - ((_temp & (((uint32_t)((mask2)) << 16) ^ 0xFFFF0000U)) >> (16 - (bits2))); \ - } while (0) - -/* XOR the SATURNIN state with the key */ -#define saturnin_xor_key() \ - do { \ - for (index = 0; index < 8; ++index) \ - S[index] ^= K[index]; \ - } while (0) - -/* XOR the SATURNIN state with a rotated version of the key */ -#define saturnin_xor_key_rotated() \ - do { \ - for (index = 0; index < 8; ++index) \ - S[index] ^= K[index + 8]; \ - } while (0) - -/* Apply an SBOX layer for SATURNIN - definition from the specification */ -#define S_LAYER(a, b, c, d) \ - do { \ - (a) ^= (b) & (c); \ - (b) ^= (a) | (d); \ - (d) ^= (b) | (c); \ - (c) ^= (b) & (d); \ - (b) ^= (a) | (c); \ - (a) ^= (b) | (d); \ - } while (0) - -/* Apply an SBOX layer for SATURNIN in reverse */ -#define S_LAYER_INVERSE(a, b, c, d) \ - do { \ - (a) ^= (b) | (d); \ - (b) ^= (a) | (c); \ - (c) ^= (b) & (d); \ - (d) ^= (b) | (c); \ - (b) ^= (a) | (d); \ - (a) ^= (b) & (c); \ - } while (0) - -/** - * \brief Applies the SBOX to the SATURNIN state. - * - * \param S The state. - */ -static void saturnin_sbox(uint32_t S[8]) -{ - uint32_t a, b, c, d; - - /* PI_0 on the first half of the state */ - a = S[0]; b = S[1]; c = S[2]; d = S[3]; - S_LAYER(a, b, c, d); - S[0] = b; S[1] = c; S[2] = d; S[3] = a; - - /* PI_1 on the second half of the state */ - a = S[4]; b = S[5]; c = S[6]; d = S[7]; - S_LAYER(a, b, c, d); - S[4] = d; S[5] = b; S[6] = a; S[7] = c; -} - -/** - * \brief Applies the inverse of the SBOX to the SATURNIN state. - * - * \param S The state. - */ -static void saturnin_sbox_inverse(uint32_t S[8]) -{ - uint32_t a, b, c, d; - - /* PI_0 on the first half of the state */ - b = S[0]; c = S[1]; d = S[2]; a = S[3]; - S_LAYER_INVERSE(a, b, c, d); - S[0] = a; S[1] = b; S[2] = c; S[3] = d; - - /* PI_1 on the second half of the state */ - d = S[4]; b = S[5]; a = S[6]; c = S[7]; - S_LAYER_INVERSE(a, b, c, d); - S[4] = a; S[5] = b; S[6] = c; S[7] = d; -} - -/** - * \brief Applies the MDS matrix to the SATURNIN state. - * - * \param S The state. - */ -static void saturnin_mds(uint32_t S[8]) -{ - uint32_t x0, x1, x2, x3, x4, x5, x6, x7; - uint32_t tmp; - - /* Load the state into temporary working variables */ - x0 = S[0]; x1 = S[1]; x2 = S[2]; x3 = S[3]; - x4 = S[4]; x5 = S[5]; x6 = S[6]; x7 = S[7]; - - /* Apply the MDS matrix to the state */ - #define SWAP(a) (((a) << 16) | ((a) >> 16)) - #define MUL(x0, x1, x2, x3, tmp) \ - do { \ - tmp = x0; x0 = x1; x1 = x2; x2 = x3; x3 = tmp ^ x0; \ - } while (0) - x0 ^= x4; x1 ^= x5; x2 ^= x6; x3 ^= x7; - MUL(x4, x5, x6, x7, tmp); - x4 ^= SWAP(x0); x5 ^= SWAP(x1); - x6 ^= SWAP(x2); x7 ^= SWAP(x3); - MUL(x0, x1, x2, x3, tmp); - MUL(x0, x1, x2, x3, tmp); - x0 ^= x4; x1 ^= x5; x2 ^= x6; x3 ^= x7; - x4 ^= SWAP(x0); x5 ^= SWAP(x1); - x6 ^= SWAP(x2); x7 ^= SWAP(x3); - - /* Store the temporary working variables back into the state */ - S[0] = x0; S[1] = x1; S[2] = x2; S[3] = x3; - S[4] = x4; S[5] = x5; S[6] = x6; S[7] = x7; -} - -/** - * \brief Applies the inverse of the MDS matrix to the SATURNIN state. - * - * \param S The state. - */ -static void saturnin_mds_inverse(uint32_t S[8]) -{ - uint32_t x0, x1, x2, x3, x4, x5, x6, x7; - uint32_t tmp; - - /* Load the state into temporary working variables */ - x0 = S[0]; x1 = S[1]; x2 = S[2]; x3 = S[3]; - x4 = S[4]; x5 = S[5]; x6 = S[6]; x7 = S[7]; - - /* Apply the inverse of the MDS matrix to the state */ - #define MULINV(x0, x1, x2, x3, tmp) \ - do { \ - tmp = x3; x3 = x2; x2 = x1; x1 = x0; x0 = x1 ^ tmp; \ - } while (0) - x6 ^= SWAP(x2); x7 ^= SWAP(x3); - x4 ^= SWAP(x0); x5 ^= SWAP(x1); - x0 ^= x4; x1 ^= x5; x2 ^= x6; x3 ^= x7; - MULINV(x0, x1, x2, x3, tmp); - MULINV(x0, x1, x2, x3, tmp); - x6 ^= SWAP(x2); x7 ^= SWAP(x3); - x4 ^= SWAP(x0); x5 ^= SWAP(x1); - MULINV(x4, x5, x6, x7, tmp); - x0 ^= x4; x1 ^= x5; x2 ^= x6; x3 ^= x7; - - /* Store the temporary working variables back into the state */ - S[0] = x0; S[1] = x1; S[2] = x2; S[3] = x3; - S[4] = x4; S[5] = x5; S[6] = x6; S[7] = x7; -} - -/** - * \brief Applies the slice permutation to the SATURNIN state. - * - * \param S The state. - */ -static void saturnin_slice(uint32_t S[8]) -{ - leftRotate4_N(S[0], 0xFFFFU, 0, 0x3333, 2); - leftRotate4_N(S[1], 0xFFFFU, 0, 0x3333, 2); - leftRotate4_N(S[2], 0xFFFFU, 0, 0x3333, 2); - leftRotate4_N(S[3], 0xFFFFU, 0, 0x3333, 2); - - leftRotate4_N(S[4], 0x7777U, 1, 0x1111, 3); - leftRotate4_N(S[5], 0x7777U, 1, 0x1111, 3); - leftRotate4_N(S[6], 0x7777U, 1, 0x1111, 3); - leftRotate4_N(S[7], 0x7777U, 1, 0x1111, 3); -} - -/** - * \brief Applies the inverse of the slice permutation to the SATURNIN state. - * - * \param S The state. - */ -static void saturnin_slice_inverse(uint32_t S[8]) -{ - leftRotate4_N(S[0], 0xFFFFU, 0, 0x3333, 2); - leftRotate4_N(S[1], 0xFFFFU, 0, 0x3333, 2); - leftRotate4_N(S[2], 0xFFFFU, 0, 0x3333, 2); - leftRotate4_N(S[3], 0xFFFFU, 0, 0x3333, 2); - - leftRotate4_N(S[4], 0x1111U, 3, 0x7777, 1); - leftRotate4_N(S[5], 0x1111U, 3, 0x7777, 1); - leftRotate4_N(S[6], 0x1111U, 3, 0x7777, 1); - leftRotate4_N(S[7], 0x1111U, 3, 0x7777, 1); -} - -/** - * \brief Applies the sheet permutation to the SATURNIN state. - * - * \param S The state. - */ -static void saturnin_sheet(uint32_t S[8]) -{ - leftRotate16_N(S[0], 0xFFFFU, 0, 0x00FF, 8); - leftRotate16_N(S[1], 0xFFFFU, 0, 0x00FF, 8); - leftRotate16_N(S[2], 0xFFFFU, 0, 0x00FF, 8); - leftRotate16_N(S[3], 0xFFFFU, 0, 0x00FF, 8); - - leftRotate16_N(S[4], 0x0FFFU, 4, 0x000F, 12); - leftRotate16_N(S[5], 0x0FFFU, 4, 0x000F, 12); - leftRotate16_N(S[6], 0x0FFFU, 4, 0x000F, 12); - leftRotate16_N(S[7], 0x0FFFU, 4, 0x000F, 12); -} - -/** - * \brief Applies the inverse of the sheet permutation to the SATURNIN state. - * - * \param S The state. - */ -static void saturnin_sheet_inverse(uint32_t S[8]) -{ - leftRotate16_N(S[0], 0xFFFFU, 0, 0x00FF, 8); - leftRotate16_N(S[1], 0xFFFFU, 0, 0x00FF, 8); - leftRotate16_N(S[2], 0xFFFFU, 0, 0x00FF, 8); - leftRotate16_N(S[3], 0xFFFFU, 0, 0x00FF, 8); - - leftRotate16_N(S[4], 0x000FU, 12, 0x0FFF, 4); - leftRotate16_N(S[5], 0x000FU, 12, 0x0FFF, 4); - leftRotate16_N(S[6], 0x000FU, 12, 0x0FFF, 4); - leftRotate16_N(S[7], 0x000FU, 12, 0x0FFF, 4); -} - -/** - * \brief Encrypts a 256-bit block with the SATURNIN block cipher. - * - * \param output Ciphertext output block, 32 bytes. - * \param input Plaintext input block, 32 bytes. - * \param key Points to the 32 byte key for the block cipher. - * \param rounds Number of rounds to perform. - * \param RC Round constants to use for domain separation. - * - * The \a input and \a output buffers can be the same. - * - * \sa saturnin_block_decrypt() - */ -static void saturnin_block_encrypt - (unsigned char *output, const unsigned char *input, - const unsigned char *key, unsigned rounds, const uint32_t *RC) -{ - uint32_t K[16]; - uint32_t S[8]; - uint32_t temp; - unsigned index; - - /* Unpack the key and the input block */ - for (index = 0; index < 16; index += 2) { - temp = ((uint32_t)(key[index])) | - (((uint32_t)(key[index + 1])) << 8) | - (((uint32_t)(key[index + 16])) << 16) | - (((uint32_t)(key[index + 17])) << 24); - K[index / 2] = temp; - K[8 + (index / 2)] = ((temp & 0x001F001FU) << 11) | - ((temp >> 5) & 0x07FF07FFU); - S[index / 2] = ((uint32_t)(input[index])) | - (((uint32_t)(input[index + 1])) << 8) | - (((uint32_t)(input[index + 16])) << 16) | - (((uint32_t)(input[index + 17])) << 24); - } - - /* XOR the key into the state */ - saturnin_xor_key(); - - /* Perform all encryption rounds */ - for (; rounds > 0; rounds -= 2, RC += 2) { - saturnin_sbox(S); - saturnin_mds(S); - saturnin_sbox(S); - saturnin_slice(S); - saturnin_mds(S); - saturnin_slice_inverse(S); - S[0] ^= RC[0]; - saturnin_xor_key_rotated(); - - saturnin_sbox(S); - saturnin_mds(S); - saturnin_sbox(S); - saturnin_sheet(S); - saturnin_mds(S); - saturnin_sheet_inverse(S); - S[0] ^= RC[1]; - saturnin_xor_key(); - } - - /* Encode the state into the output block */ - for (index = 0; index < 16; index += 2) { - temp = S[index / 2]; - output[index] = (uint8_t)temp; - output[index + 1] = (uint8_t)(temp >> 8); - output[index + 16] = (uint8_t)(temp >> 16); - output[index + 17] = (uint8_t)(temp >> 24); - } -} - -/** - * \brief Decrypts a 256-bit block with the SATURNIN block cipher. - * - * \param output Plaintext output block, 32 bytes. - * \param input Ciphertext input block, 32 bytes. - * \param key Points to the 32 byte key for the block cipher. - * \param rounds Number of rounds to perform. - * \param RC Round constants to use for domain separation. - * - * The \a input and \a output buffers can be the same. - * - * \sa saturnin_block_encrypt() - */ -static void saturnin_block_decrypt - (unsigned char *output, const unsigned char *input, - const unsigned char *key, unsigned rounds, const uint32_t *RC) -{ - uint32_t K[16]; - uint32_t S[8]; - uint32_t temp; - unsigned index; - - /* Unpack the key and the input block */ - for (index = 0; index < 16; index += 2) { - temp = ((uint32_t)(key[index])) | - (((uint32_t)(key[index + 1])) << 8) | - (((uint32_t)(key[index + 16])) << 16) | - (((uint32_t)(key[index + 17])) << 24); - K[index / 2] = temp; - K[8 + (index / 2)] = ((temp & 0x001F001FU) << 11) | - ((temp >> 5) & 0x07FF07FFU); - S[index / 2] = ((uint32_t)(input[index])) | - (((uint32_t)(input[index + 1])) << 8) | - (((uint32_t)(input[index + 16])) << 16) | - (((uint32_t)(input[index + 17])) << 24); - } - - /* Perform all decryption rounds */ - RC += rounds - 2; - for (; rounds > 0; rounds -= 2, RC -= 2) { - saturnin_xor_key(); - S[0] ^= RC[1]; - saturnin_sheet(S); - saturnin_mds_inverse(S); - saturnin_sheet_inverse(S); - saturnin_sbox_inverse(S); - saturnin_mds_inverse(S); - saturnin_sbox_inverse(S); - - saturnin_xor_key_rotated(); - S[0] ^= RC[0]; - saturnin_slice(S); - saturnin_mds_inverse(S); - saturnin_slice_inverse(S); - saturnin_sbox_inverse(S); - saturnin_mds_inverse(S); - saturnin_sbox_inverse(S); - } - - /* XOR the key into the state */ - saturnin_xor_key(); - - /* Encode the state into the output block */ - for (index = 0; index < 16; index += 2) { - temp = S[index / 2]; - output[index] = (uint8_t)temp; - output[index + 1] = (uint8_t)(temp >> 8); - output[index + 16] = (uint8_t)(temp >> 16); - output[index + 17] = (uint8_t)(temp >> 24); - } -} - /** * \brief Encrypts a 256-bit block with the SATURNIN block cipher and * then XOR's itself to generate a new key. * * \param block Block to be encrypted and then XOR'ed with itself. * \param key Points to the 32 byte key for the block cipher. - * \param rounds Number of rounds to perform. - * \param RC Round constants to use for domain separation. + * \param domain Domain separator and round counter. */ -void saturnin_block_encrypt_xor - (const unsigned char *block, unsigned char *key, - unsigned rounds, const uint32_t *RC) +static void saturnin_block_encrypt_xor + (const unsigned char *block, unsigned char *key, unsigned domain) { - unsigned char temp[32]; - saturnin_block_encrypt(temp, block, key, rounds, RC); - lw_xor_block_2_src(key, block, temp, 32); + saturnin_key_schedule_t ks; + unsigned char *temp = (unsigned char *)ks.k; /* Reuse some stack space */ + saturnin_setup_key(&ks, key); + saturnin_encrypt_block(&ks, temp, block, domain); + lw_xor_block_2_src(key, block, temp, SATURNIN_BLOCK_SIZE); } /** @@ -499,20 +81,20 @@ void saturnin_block_encrypt_xor * \param c Output ciphertext buffer. * \param m Input plaintext buffer. * \param mlen Length of the plaintext in bytes. - * \param k Points to the 32-byte key. + * \param ks Points to the key schedule. * \param block Points to the pre-formatted nonce block. */ static void saturnin_ctr_encrypt (unsigned char *c, const unsigned char *m, unsigned long long mlen, - const unsigned char *k, unsigned char *block) + const saturnin_key_schedule_t *ks, unsigned char *block) { /* Note: Specification requires a 95-bit counter but we only use 32-bit. * This limits the maximum packet size to 128Gb. That should be OK */ uint32_t counter = 1; - unsigned char out[32]; + unsigned char out[SATURNIN_BLOCK_SIZE]; while (mlen >= 32) { be_store_word32(block + 28, counter); - saturnin_block_encrypt(out, block, k, 10, RC_10_1); + saturnin_encrypt_block(ks, out, block, SATURNIN_DOMAIN_10_1); lw_xor_block_2_src(c, out, m, 32); c += 32; m += 32; @@ -521,7 +103,7 @@ static void saturnin_ctr_encrypt } if (mlen > 0) { be_store_word32(block + 28, counter); - saturnin_block_encrypt(out, block, k, 10, RC_10_1); + saturnin_encrypt_block(ks, out, block, SATURNIN_DOMAIN_10_1); lw_xor_block_2_src(c, out, m, (unsigned)mlen); } } @@ -533,18 +115,17 @@ static void saturnin_ctr_encrypt * \param block Temporary block of 32 bytes from the caller. * \param m Points to the message to be authenticated. * \param mlen Length of the message to be authenticated in bytes. - * \param rounds Number of rounds to perform. - * \param RC1 Round constants to use for domain separation on full blocks. - * \param RC2 Round constants to use for domain separation on the last block. + * \param domain1 Round count and domain separator for full blocks. + * \param domain2 Round count and domain separator for the last block. */ static void saturnin_authenticate (unsigned char *tag, unsigned char *block, const unsigned char *m, unsigned long long mlen, - unsigned rounds, const uint32_t *RC1, const uint32_t *RC2) + unsigned domain1, unsigned domain2) { unsigned temp; while (mlen >= 32) { - saturnin_block_encrypt_xor(m, tag, rounds, RC1); + saturnin_block_encrypt_xor(m, tag, domain1); m += 32; mlen -= 32; } @@ -552,7 +133,7 @@ static void saturnin_authenticate memcpy(block, m, temp); block[temp] = 0x80; memset(block + temp + 1, 0, 31 - temp); - saturnin_block_encrypt_xor(block, tag, rounds, RC2); + saturnin_block_encrypt_xor(block, tag, domain2); } int saturnin_aead_encrypt @@ -563,6 +144,7 @@ int saturnin_aead_encrypt const unsigned char *npub, const unsigned char *k) { + saturnin_key_schedule_t ks; unsigned char block[32]; unsigned char *tag; (void)nsec; @@ -576,17 +158,20 @@ int saturnin_aead_encrypt memset(block + 17, 0, 15); /* Encrypt the plaintext in counter mode to produce the ciphertext */ - saturnin_ctr_encrypt(c, m, mlen, k, block); + saturnin_setup_key(&ks, k); + saturnin_ctr_encrypt(c, m, mlen, &ks, block); /* Set the counter back to zero and then encrypt the nonce */ tag = c + mlen; memcpy(tag, k, 32); memset(block + 17, 0, 15); - saturnin_block_encrypt_xor(block, tag, 10, RC_10_2); + saturnin_block_encrypt_xor(block, tag, SATURNIN_DOMAIN_10_2); /* Authenticate the associated data and the ciphertext */ - saturnin_authenticate(tag, block, ad, adlen, 10, RC_10_2, RC_10_3); - saturnin_authenticate(tag, block, c, mlen, 10, RC_10_4, RC_10_5); + saturnin_authenticate + (tag, block, ad, adlen, SATURNIN_DOMAIN_10_2, SATURNIN_DOMAIN_10_3); + saturnin_authenticate + (tag, block, c, mlen, SATURNIN_DOMAIN_10_4, SATURNIN_DOMAIN_10_5); return 0; } @@ -598,6 +183,7 @@ int saturnin_aead_decrypt const unsigned char *npub, const unsigned char *k) { + saturnin_key_schedule_t ks; unsigned char block[32]; unsigned char tag[32]; (void)nsec; @@ -614,17 +200,20 @@ int saturnin_aead_decrypt /* Encrypt the nonce to initialize the authentication phase */ memcpy(tag, k, 32); - saturnin_block_encrypt_xor(block, tag, 10, RC_10_2); + saturnin_block_encrypt_xor(block, tag, SATURNIN_DOMAIN_10_2); /* Authenticate the associated data and the ciphertext */ - saturnin_authenticate(tag, block, ad, adlen, 10, RC_10_2, RC_10_3); - saturnin_authenticate(tag, block, c, *mlen, 10, RC_10_4, RC_10_5); + saturnin_authenticate + (tag, block, ad, adlen, SATURNIN_DOMAIN_10_2, SATURNIN_DOMAIN_10_3); + saturnin_authenticate + (tag, block, c, *mlen, SATURNIN_DOMAIN_10_4, SATURNIN_DOMAIN_10_5); /* Decrypt the ciphertext in counter mode to produce the plaintext */ memcpy(block, npub, 16); block[16] = 0x80; memset(block + 17, 0, 15); - saturnin_ctr_encrypt(m, c, *mlen, k, block); + saturnin_setup_key(&ks, k); + saturnin_ctr_encrypt(m, c, *mlen, &ks, block); /* Check the authentication tag at the end of the message */ return aead_check_tag @@ -639,6 +228,7 @@ int saturnin_short_aead_encrypt const unsigned char *npub, const unsigned char *k) { + saturnin_key_schedule_t ks; unsigned char block[32]; unsigned temp; (void)nsec; @@ -656,7 +246,8 @@ int saturnin_short_aead_encrypt memset(block + 17 + temp, 0, 15 - temp); /* Encrypt the input block to produce the output ciphertext */ - saturnin_block_encrypt(c, block, k, 10, RC_10_6); + saturnin_setup_key(&ks, k); + saturnin_encrypt_block(&ks, c, block, SATURNIN_DOMAIN_10_6); *clen = 32; return 0; } @@ -669,6 +260,7 @@ int saturnin_short_aead_decrypt const unsigned char *npub, const unsigned char *k) { + saturnin_key_schedule_t ks; unsigned char block[32]; unsigned check1, check2, len; int index, result; @@ -682,7 +274,8 @@ int saturnin_short_aead_decrypt return -1; /* Decrypt the ciphertext block */ - saturnin_block_decrypt(block, c, k, 10, RC_10_6); + saturnin_setup_key(&ks, k); + saturnin_decrypt_block(&ks, block, c, SATURNIN_DOMAIN_10_6); /* Verify that the output block starts with the nonce and that it is * padded correctly. We need to do this very carefully to avoid leaking @@ -723,7 +316,8 @@ int saturnin_hash unsigned char tag[32]; unsigned char block[32]; memset(tag, 0, sizeof(tag)); - saturnin_authenticate(tag, block, in, inlen, 16, RC_16_7, RC_16_8); + saturnin_authenticate + (tag, block, in, inlen, SATURNIN_DOMAIN_16_7, SATURNIN_DOMAIN_16_8); memcpy(out, tag, 32); return 0; } @@ -752,12 +346,14 @@ void saturnin_hash_update state->s.count = 0; in += temp; inlen -= temp; - saturnin_block_encrypt_xor(state->s.block, state->s.hash, 16, RC_16_7); + saturnin_block_encrypt_xor + (state->s.block, state->s.hash, SATURNIN_DOMAIN_16_7); } /* Process full blocks that are aligned at state->s.count == 0 */ while (inlen >= 32) { - saturnin_block_encrypt_xor(in, state->s.hash, 16, RC_16_7); + saturnin_block_encrypt_xor + (in, state->s.hash, SATURNIN_DOMAIN_16_7); in += 32; inlen -= 32; } @@ -776,6 +372,7 @@ void saturnin_hash_finalize memset(state->s.block + state->s.count + 1, 0, 31 - state->s.count); /* Generate the final hash value */ - saturnin_block_encrypt_xor(state->s.block, state->s.hash, 16, RC_16_8); + saturnin_block_encrypt_xor + (state->s.block, state->s.hash, SATURNIN_DOMAIN_16_8); memcpy(out, state->s.hash, 32); } diff --git a/saturnin/Implementations/crypto_aead/saturninshortv2/rhys/internal-saturnin-avr.S b/saturnin/Implementations/crypto_aead/saturninshortv2/rhys/internal-saturnin-avr.S new file mode 100644 index 0000000..f20ce72 --- /dev/null +++ b/saturnin/Implementations/crypto_aead/saturninshortv2/rhys/internal-saturnin-avr.S @@ -0,0 +1,6365 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_0, @object + .size table_0, 384 +table_0: + .byte 194 + .byte 38 + .byte 176 + .byte 78 + .byte 3 + .byte 83 + .byte 89 + .byte 144 + .byte 50 + .byte 230 + .byte 143 + .byte 170 + .byte 146 + .byte 138 + .byte 146 + .byte 254 + .byte 25 + .byte 164 + .byte 21 + .byte 65 + .byte 50 + .byte 149 + .byte 83 + .byte 147 + .byte 78 + .byte 204 + .byte 177 + .byte 93 + .byte 202 + .byte 21 + .byte 21 + .byte 84 + .byte 168 + .byte 85 + .byte 31 + .byte 189 + .byte 13 + .byte 26 + .byte 110 + .byte 90 + .byte 181 + .byte 38 + .byte 69 + .byte 78 + .byte 240 + .byte 95 + .byte 86 + .byte 163 + .byte 216 + .byte 32 + .byte 143 + .byte 15 + .byte 225 + .byte 190 + .byte 84 + .byte 11 + .byte 157 + .byte 108 + .byte 26 + .byte 125 + .byte 10 + .byte 40 + .byte 166 + .byte 23 + .byte 134 + .byte 201 + .byte 70 + .byte 170 + .byte 98 + .byte 144 + .byte 25 + .byte 193 + .byte 222 + .byte 92 + .byte 44 + .byte 24 + .byte 254 + .byte 83 + .byte 13 + .byte 160 + .byte 152 + .byte 38 + .byte 22 + .byte 78 + .byte 161 + .byte 91 + .byte 83 + .byte 178 + .byte 101 + .byte 157 + .byte 143 + .byte 108 + .byte 48 + .byte 173 + .byte 22 + .byte 88 + .byte 250 + .byte 212 + .byte 31 + .byte 105 + .byte 249 + .byte 188 + .byte 245 + .byte 107 + .byte 37 + .byte 53 + .byte 235 + .byte 248 + .byte 250 + .byte 236 + .byte 29 + .byte 178 + .byte 23 + .byte 164 + .byte 61 + .byte 123 + .byte 180 + .byte 148 + .byte 44 + .byte 246 + .byte 91 + .byte 38 + .byte 175 + .byte 79 + .byte 22 + .byte 70 + .byte 72 + .byte 197 + .byte 33 + .byte 173 + .byte 220 + .byte 69 + .byte 7 + .byte 214 + .byte 139 + .byte 224 + .byte 184 + .byte 253 + .byte 4 + .byte 5 + .byte 87 + .byte 82 + .byte 31 + .byte 30 + .byte 22 + .byte 194 + .byte 251 + .byte 69 + .byte 31 + .byte 155 + .byte 82 + .byte 235 + .byte 50 + .byte 78 + .byte 25 + .byte 82 + .byte 24 + .byte 192 + .byte 152 + .byte 84 + .byte 118 + .byte 38 + .byte 252 + .byte 79 + .byte 71 + .byte 66 + .byte 77 + .byte 212 + .byte 156 + .byte 16 + .byte 220 + .byte 38 + .byte 214 + .byte 197 + .byte 201 + .byte 179 + .byte 223 + .byte 69 + .byte 1 + .byte 17 + .byte 164 + .byte 198 + .byte 76 + .byte 98 + .byte 181 + .byte 62 + .byte 86 + .byte 23 + .byte 135 + .byte 231 + .byte 86 + .byte 152 + .byte 251 + .byte 182 + .byte 8 + .byte 49 + .byte 82 + .byte 7 + .byte 185 + .byte 2 + .byte 1 + .byte 38 + .byte 9 + .byte 79 + .byte 180 + .byte 78 + .byte 66 + .byte 231 + .byte 118 + .byte 214 + .byte 220 + .byte 131 + .byte 165 + .byte 241 + .byte 15 + .byte 70 + .byte 91 + .byte 141 + .byte 14 + .byte 45 + .byte 156 + .byte 123 + .byte 185 + .byte 230 + .byte 125 + .byte 59 + .byte 161 + .byte 224 + .byte 47 + .byte 98 + .byte 90 + .byte 13 + .byte 141 + .byte 191 + .byte 59 + .byte 148 + .byte 161 + .byte 78 + .byte 218 + .byte 248 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 12 + .byte 24 + .byte 186 + .byte 63 + .byte 171 + .byte 185 + .byte 58 + .byte 86 + .byte 239 + .byte 165 + .byte 94 + .byte 18 + .byte 108 + .byte 162 + .byte 157 + .byte 133 + .byte 155 + .byte 119 + .byte 207 + .byte 184 + .byte 147 + .byte 231 + .byte 77 + .byte 125 + .byte 159 + .byte 180 + .byte 239 + .byte 7 + .byte 6 + .byte 83 + .byte 82 + .byte 141 + .byte 171 + .byte 230 + .byte 8 + .byte 30 + .byte 135 + .byte 159 + .byte 114 + .byte 65 + .byte 10 + .byte 239 + .byte 74 + .byte 140 + .byte 167 + .byte 201 + .byte 160 + .byte 74 + .byte 239 + .byte 149 + .byte 58 + .byte 217 + .byte 175 + .byte 210 + .byte 0 + .byte 187 + .byte 240 + .byte 91 + .byte 44 + .byte 182 + .byte 216 + .byte 148 + .byte 109 + .byte 56 + .byte 167 + .byte 25 + .byte 155 + .byte 60 + .byte 148 + .byte 134 + .byte 9 + .byte 169 + .byte 218 + .byte 120 + .byte 248 + .byte 35 + .byte 211 + .byte 71 + .byte 182 + .byte 167 + .byte 120 + .byte 157 + .byte 252 + .byte 116 + .byte 17 + .byte 174 + .byte 202 + .byte 234 + .byte 119 + .byte 166 + .byte 49 + .byte 47 + .byte 84 + .byte 192 + .byte 200 + .byte 76 + .byte 5 + .byte 202 + .byte 81 + .byte 47 + .byte 149 + .byte 241 + .byte 104 + .byte 82 + .byte 43 + .byte 138 + .byte 91 + .byte 79 + .byte 172 + .byte 180 + .byte 20 + .byte 246 + .byte 1 + .byte 84 + .byte 217 + .byte 241 + .byte 104 + .byte 37 + .byte 77 + .byte 118 + .byte 17 + .byte 54 + .byte 73 + .byte 106 + .byte 62 + .byte 156 + .byte 239 + .byte 142 + + .text +.global saturnin_setup_key + .type saturnin_setup_key, @function +saturnin_setup_key: + movw r30,r24 + movw r26,r22 +.L__stack_usage = 2 + ld r18,X+ + ld r19,X+ + st Z,r18 + std Z+1,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+32,r18 + std Z+33,r19 + ld r18,X+ + ld r19,X+ + std Z+4,r18 + std Z+5,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+36,r18 + std Z+37,r19 + ld r18,X+ + ld r19,X+ + std Z+8,r18 + std Z+9,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+40,r18 + std Z+41,r19 + ld r18,X+ + ld r19,X+ + std Z+12,r18 + std Z+13,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+44,r18 + std Z+45,r19 + ld r18,X+ + ld r19,X+ + std Z+16,r18 + std Z+17,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+48,r18 + std Z+49,r19 + ld r18,X+ + ld r19,X+ + std Z+20,r18 + std Z+21,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+52,r18 + std Z+53,r19 + ld r18,X+ + ld r19,X+ + std Z+24,r18 + std Z+25,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+56,r18 + std Z+57,r19 + ld r18,X+ + ld r19,X+ + std Z+28,r18 + std Z+29,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+60,r18 + std Z+61,r19 + ld r18,X+ + ld r19,X+ + std Z+2,r18 + std Z+3,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+34,r18 + std Z+35,r19 + ld r18,X+ + ld r19,X+ + std Z+6,r18 + std Z+7,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+38,r18 + std Z+39,r19 + ld r18,X+ + ld r19,X+ + std Z+10,r18 + std Z+11,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+42,r18 + std Z+43,r19 + ld r18,X+ + ld r19,X+ + std Z+14,r18 + std Z+15,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+46,r18 + std Z+47,r19 + ld r18,X+ + ld r19,X+ + std Z+18,r18 + std Z+19,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+50,r18 + std Z+51,r19 + ld r18,X+ + ld r19,X+ + std Z+22,r18 + std Z+23,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+54,r18 + std Z+55,r19 + ld r18,X+ + ld r19,X+ + std Z+26,r18 + std Z+27,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+58,r18 + std Z+59,r19 + ld r18,X+ + ld r19,X+ + std Z+30,r18 + std Z+31,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+62,r18 + std Z+63,r19 + ret + .size saturnin_setup_key, .-saturnin_setup_key + + .text +.global saturnin_encrypt_block + .type saturnin_encrypt_block, @function +saturnin_encrypt_block: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r23 + push r22 + movw r30,r24 + movw r26,r20 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 52 + ld r2,X+ + ld r3,X+ + ld r0,Z + eor r2,r0 + ldd r0,Z+1 + eor r3,r0 + ld r6,X+ + ld r7,X+ + ldd r0,Z+4 + eor r6,r0 + ldd r0,Z+5 + eor r7,r0 + ld r10,X+ + ld r11,X+ + ldd r0,Z+8 + eor r10,r0 + ldd r0,Z+9 + eor r11,r0 + ld r14,X+ + ld r15,X+ + ldd r0,Z+12 + eor r14,r0 + ldd r0,Z+13 + eor r15,r0 + ld r20,X+ + ld r21,X+ + ldd r0,Z+16 + eor r20,r0 + ldd r0,Z+17 + eor r21,r0 + std Y+17,r20 + std Y+18,r21 + ld r20,X+ + ld r21,X+ + ldd r0,Z+20 + eor r20,r0 + ldd r0,Z+21 + eor r21,r0 + std Y+21,r20 + std Y+22,r21 + ld r20,X+ + ld r21,X+ + ldd r0,Z+24 + eor r20,r0 + ldd r0,Z+25 + eor r21,r0 + std Y+25,r20 + std Y+26,r21 + ld r20,X+ + ld r21,X+ + ldd r0,Z+28 + eor r20,r0 + ldd r0,Z+29 + eor r21,r0 + std Y+29,r20 + std Y+30,r21 + ld r4,X+ + ld r5,X+ + ldd r0,Z+2 + eor r4,r0 + ldd r0,Z+3 + eor r5,r0 + ld r8,X+ + ld r9,X+ + ldd r0,Z+6 + eor r8,r0 + ldd r0,Z+7 + eor r9,r0 + ld r12,X+ + ld r13,X+ + ldd r0,Z+10 + eor r12,r0 + ldd r0,Z+11 + eor r13,r0 + ld r24,X+ + ld r25,X+ + ldd r0,Z+14 + eor r24,r0 + ldd r0,Z+15 + eor r25,r0 + ld r20,X+ + ld r21,X+ + ldd r0,Z+18 + eor r20,r0 + ldd r0,Z+19 + eor r21,r0 + std Y+19,r20 + std Y+20,r21 + ld r20,X+ + ld r21,X+ + ldd r0,Z+22 + eor r20,r0 + ldd r0,Z+23 + eor r21,r0 + std Y+23,r20 + std Y+24,r21 + ld r20,X+ + ld r21,X+ + ldd r0,Z+26 + eor r20,r0 + ldd r0,Z+27 + eor r21,r0 + std Y+27,r20 + std Y+28,r21 + ld r20,X+ + ld r21,X+ + ldd r0,Z+30 + eor r20,r0 + ldd r0,Z+31 + eor r21,r0 + std Y+31,r20 + std Y+32,r21 + ldi r16,5 + cpi r18,60 + cpc r19,r1 + brcs 120f + ldi r16,8 + ldi r17,4 + add r18,r17 + adc r19,r1 +120: + lsl r18 + rol r19 + lsl r18 + rol r19 + rjmp 1447f +126: + mov r0,r6 + and r0,r10 + eor r2,r0 + mov r0,r7 + and r0,r11 + eor r3,r0 + mov r0,r8 + and r0,r12 + eor r4,r0 + mov r0,r9 + and r0,r13 + eor r5,r0 + mov r0,r2 + or r0,r14 + eor r6,r0 + mov r0,r3 + or r0,r15 + eor r7,r0 + mov r0,r4 + or r0,r24 + eor r8,r0 + mov r0,r5 + or r0,r25 + eor r9,r0 + mov r0,r6 + or r0,r10 + eor r14,r0 + mov r0,r7 + or r0,r11 + eor r15,r0 + mov r0,r8 + or r0,r12 + eor r24,r0 + mov r0,r9 + or r0,r13 + eor r25,r0 + mov r0,r6 + and r0,r14 + eor r10,r0 + mov r0,r7 + and r0,r15 + eor r11,r0 + mov r0,r8 + and r0,r24 + eor r12,r0 + mov r0,r9 + and r0,r25 + eor r13,r0 + mov r0,r2 + or r0,r10 + eor r6,r0 + mov r0,r3 + or r0,r11 + eor r7,r0 + mov r0,r4 + or r0,r12 + eor r8,r0 + mov r0,r5 + or r0,r13 + eor r9,r0 + mov r0,r6 + or r0,r14 + eor r2,r0 + mov r0,r7 + or r0,r15 + eor r3,r0 + mov r0,r8 + or r0,r24 + eor r4,r0 + mov r0,r9 + or r0,r25 + eor r5,r0 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Y+17 + ldd r3,Y+18 + ldd r4,Y+19 + ldd r5,Y+20 + ldd r6,Y+21 + ldd r7,Y+22 + ldd r8,Y+23 + ldd r9,Y+24 + ldd r10,Y+25 + ldd r11,Y+26 + ldd r12,Y+27 + ldd r13,Y+28 + ldd r14,Y+29 + ldd r15,Y+30 + ldd r24,Y+31 + ldd r25,Y+32 + mov r0,r6 + and r0,r10 + eor r2,r0 + mov r0,r7 + and r0,r11 + eor r3,r0 + mov r0,r8 + and r0,r12 + eor r4,r0 + mov r0,r9 + and r0,r13 + eor r5,r0 + mov r0,r2 + or r0,r14 + eor r6,r0 + mov r0,r3 + or r0,r15 + eor r7,r0 + mov r0,r4 + or r0,r24 + eor r8,r0 + mov r0,r5 + or r0,r25 + eor r9,r0 + mov r0,r6 + or r0,r10 + eor r14,r0 + mov r0,r7 + or r0,r11 + eor r15,r0 + mov r0,r8 + or r0,r12 + eor r24,r0 + mov r0,r9 + or r0,r13 + eor r25,r0 + mov r0,r6 + and r0,r14 + eor r10,r0 + mov r0,r7 + and r0,r15 + eor r11,r0 + mov r0,r8 + and r0,r24 + eor r12,r0 + mov r0,r9 + and r0,r25 + eor r13,r0 + mov r0,r2 + or r0,r10 + eor r6,r0 + mov r0,r3 + or r0,r11 + eor r7,r0 + mov r0,r4 + or r0,r12 + eor r8,r0 + mov r0,r5 + or r0,r13 + eor r9,r0 + mov r0,r6 + or r0,r14 + eor r2,r0 + mov r0,r7 + or r0,r15 + eor r3,r0 + mov r0,r8 + or r0,r24 + eor r4,r0 + mov r0,r9 + or r0,r25 + eor r5,r0 + ldd r0,Y+5 + eor r0,r14 + std Y+5,r0 + ldd r0,Y+6 + eor r0,r15 + std Y+6,r0 + ldd r0,Y+7 + eor r0,r24 + std Y+7,r0 + ldd r0,Y+8 + eor r0,r25 + std Y+8,r0 + ldd r0,Y+9 + eor r0,r6 + std Y+9,r0 + ldd r0,Y+10 + eor r0,r7 + std Y+10,r0 + ldd r0,Y+11 + eor r0,r8 + std Y+11,r0 + ldd r0,Y+12 + eor r0,r9 + std Y+12,r0 + ldd r0,Y+13 + eor r0,r2 + std Y+13,r0 + ldd r0,Y+14 + eor r0,r3 + std Y+14,r0 + ldd r0,Y+15 + eor r0,r4 + std Y+15,r0 + ldd r0,Y+16 + eor r0,r5 + std Y+16,r0 + ldd r0,Y+1 + eor r0,r10 + std Y+1,r0 + ldd r0,Y+2 + eor r0,r11 + std Y+2,r0 + ldd r0,Y+3 + eor r0,r12 + std Y+3,r0 + ldd r0,Y+4 + eor r0,r13 + std Y+4,r0 + movw r20,r14 + movw r22,r24 + movw r14,r6 + movw r24,r8 + movw r6,r2 + movw r8,r4 + movw r2,r10 + movw r4,r12 + movw r10,r20 + movw r12,r22 + eor r10,r14 + eor r11,r15 + eor r12,r24 + eor r13,r25 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+9 + ldd r23,Y+10 + eor r20,r22 + eor r21,r23 + std Y+5,r22 + std Y+6,r23 + ldd r22,Y+13 + ldd r23,Y+14 + std Y+9,r22 + std Y+10,r23 + ldd r22,Y+1 + ldd r23,Y+2 + std Y+13,r22 + std Y+14,r23 + std Y+1,r20 + std Y+2,r21 + ldd r20,Y+7 + ldd r21,Y+8 + ldd r22,Y+11 + ldd r23,Y+12 + eor r20,r22 + eor r21,r23 + std Y+7,r22 + std Y+8,r23 + ldd r22,Y+15 + ldd r23,Y+16 + std Y+11,r22 + std Y+12,r23 + ldd r22,Y+3 + ldd r23,Y+4 + std Y+15,r22 + std Y+16,r23 + std Y+3,r20 + std Y+4,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+9 + ldd r23,Y+10 + eor r20,r22 + eor r21,r23 + eor r22,r14 + eor r23,r15 + std Y+5,r22 + std Y+6,r23 + ldd r22,Y+13 + ldd r23,Y+14 + eor r22,r6 + eor r23,r7 + std Y+9,r22 + std Y+10,r23 + ldd r22,Y+1 + ldd r23,Y+2 + eor r22,r2 + eor r23,r3 + std Y+13,r22 + std Y+14,r23 + eor r20,r10 + eor r21,r11 + std Y+1,r20 + std Y+2,r21 + ldd r20,Y+7 + ldd r21,Y+8 + ldd r22,Y+11 + ldd r23,Y+12 + eor r20,r22 + eor r21,r23 + eor r22,r24 + eor r23,r25 + std Y+7,r22 + std Y+8,r23 + ldd r22,Y+15 + ldd r23,Y+16 + eor r22,r8 + eor r23,r9 + std Y+11,r22 + std Y+12,r23 + ldd r22,Y+3 + ldd r23,Y+4 + eor r22,r4 + eor r23,r5 + std Y+15,r22 + std Y+16,r23 + eor r20,r12 + eor r21,r13 + std Y+3,r20 + std Y+4,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + mov r0,r6 + and r0,r2 + eor r14,r0 + mov r0,r7 + and r0,r3 + eor r15,r0 + mov r0,r8 + and r0,r4 + eor r24,r0 + mov r0,r9 + and r0,r5 + eor r25,r0 + mov r0,r14 + or r0,r10 + eor r6,r0 + mov r0,r15 + or r0,r11 + eor r7,r0 + mov r0,r24 + or r0,r12 + eor r8,r0 + mov r0,r25 + or r0,r13 + eor r9,r0 + mov r0,r6 + or r0,r2 + eor r10,r0 + mov r0,r7 + or r0,r3 + eor r11,r0 + mov r0,r8 + or r0,r4 + eor r12,r0 + mov r0,r9 + or r0,r5 + eor r13,r0 + mov r0,r6 + and r0,r10 + eor r2,r0 + mov r0,r7 + and r0,r11 + eor r3,r0 + mov r0,r8 + and r0,r12 + eor r4,r0 + mov r0,r9 + and r0,r13 + eor r5,r0 + mov r0,r14 + or r0,r2 + eor r6,r0 + mov r0,r15 + or r0,r3 + eor r7,r0 + mov r0,r24 + or r0,r4 + eor r8,r0 + mov r0,r25 + or r0,r5 + eor r9,r0 + mov r0,r6 + or r0,r10 + eor r14,r0 + mov r0,r7 + or r0,r11 + eor r15,r0 + mov r0,r8 + or r0,r12 + eor r24,r0 + mov r0,r9 + or r0,r13 + eor r25,r0 + movw r20,r14 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r14,r22 + and r15,r22 + lsl r14 + rol r15 + or r14,r20 + or r15,r21 + movw r20,r24 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r24,r22 + and r25,r22 + lsl r24 + rol r25 + lsl r24 + rol r25 + lsl r24 + rol r25 + or r24,r20 + or r25,r21 + movw r20,r6 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r6,r22 + and r7,r22 + lsl r6 + rol r7 + or r6,r20 + or r7,r21 + movw r20,r8 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r8,r22 + and r9,r22 + lsl r8 + rol r9 + lsl r8 + rol r9 + lsl r8 + rol r9 + or r8,r20 + or r9,r21 + movw r20,r2 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r2,r22 + and r3,r22 + lsl r2 + rol r3 + or r2,r20 + or r3,r21 + movw r20,r4 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r4,r22 + and r5,r22 + lsl r4 + rol r5 + lsl r4 + rol r5 + lsl r4 + rol r5 + or r4,r20 + or r5,r21 + movw r20,r10 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r10,r22 + and r11,r22 + lsl r10 + rol r11 + or r10,r20 + or r11,r21 + movw r20,r12 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r12,r22 + and r13,r22 + lsl r12 + rol r13 + lsl r12 + rol r13 + lsl r12 + rol r13 + or r12,r20 + or r13,r21 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r3,Y+2 + ldd r4,Y+3 + ldd r5,Y+4 + ldd r6,Y+5 + ldd r7,Y+6 + ldd r8,Y+7 + ldd r9,Y+8 + ldd r10,Y+9 + ldd r11,Y+10 + ldd r12,Y+11 + ldd r13,Y+12 + ldd r14,Y+13 + ldd r15,Y+14 + ldd r24,Y+15 + ldd r25,Y+16 + mov r0,r10 + and r0,r14 + eor r6,r0 + mov r0,r11 + and r0,r15 + eor r7,r0 + mov r0,r12 + and r0,r24 + eor r8,r0 + mov r0,r13 + and r0,r25 + eor r9,r0 + mov r0,r6 + or r0,r2 + eor r10,r0 + mov r0,r7 + or r0,r3 + eor r11,r0 + mov r0,r8 + or r0,r4 + eor r12,r0 + mov r0,r9 + or r0,r5 + eor r13,r0 + mov r0,r10 + or r0,r14 + eor r2,r0 + mov r0,r11 + or r0,r15 + eor r3,r0 + mov r0,r12 + or r0,r24 + eor r4,r0 + mov r0,r13 + or r0,r25 + eor r5,r0 + mov r0,r10 + and r0,r2 + eor r14,r0 + mov r0,r11 + and r0,r3 + eor r15,r0 + mov r0,r12 + and r0,r4 + eor r24,r0 + mov r0,r13 + and r0,r5 + eor r25,r0 + mov r0,r6 + or r0,r14 + eor r10,r0 + mov r0,r7 + or r0,r15 + eor r11,r0 + mov r0,r8 + or r0,r24 + eor r12,r0 + mov r0,r9 + or r0,r25 + eor r13,r0 + mov r0,r10 + or r0,r2 + eor r6,r0 + mov r0,r11 + or r0,r3 + eor r7,r0 + mov r0,r12 + or r0,r4 + eor r8,r0 + mov r0,r13 + or r0,r5 + eor r9,r0 + movw r20,r8 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r8,r22 + and r9,r22 + lsl r8 + rol r9 + lsl r8 + rol r9 + or r8,r20 + or r9,r21 + movw r20,r12 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r12,r22 + and r13,r22 + lsl r12 + rol r13 + lsl r12 + rol r13 + or r12,r20 + or r13,r21 + movw r20,r24 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r24,r22 + and r25,r22 + lsl r24 + rol r25 + lsl r24 + rol r25 + or r24,r20 + or r25,r21 + movw r20,r4 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r4,r22 + and r5,r22 + lsl r4 + rol r5 + lsl r4 + rol r5 + or r4,r20 + or r5,r21 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Y+17 + ldd r3,Y+18 + ldd r4,Y+19 + ldd r5,Y+20 + ldd r6,Y+21 + ldd r7,Y+22 + ldd r8,Y+23 + ldd r9,Y+24 + ldd r10,Y+25 + ldd r11,Y+26 + ldd r12,Y+27 + ldd r13,Y+28 + ldd r14,Y+29 + ldd r15,Y+30 + ldd r24,Y+31 + ldd r25,Y+32 + ldd r0,Y+9 + eor r0,r10 + std Y+9,r0 + ldd r0,Y+10 + eor r0,r11 + std Y+10,r0 + ldd r0,Y+11 + eor r0,r12 + std Y+11,r0 + ldd r0,Y+12 + eor r0,r13 + std Y+12,r0 + ldd r0,Y+13 + eor r0,r6 + std Y+13,r0 + ldd r0,Y+14 + eor r0,r7 + std Y+14,r0 + ldd r0,Y+15 + eor r0,r8 + std Y+15,r0 + ldd r0,Y+16 + eor r0,r9 + std Y+16,r0 + ldd r0,Y+1 + eor r0,r14 + std Y+1,r0 + ldd r0,Y+2 + eor r0,r15 + std Y+2,r0 + ldd r0,Y+3 + eor r0,r24 + std Y+3,r0 + ldd r0,Y+4 + eor r0,r25 + std Y+4,r0 + ldd r0,Y+5 + eor r0,r2 + std Y+5,r0 + ldd r0,Y+6 + eor r0,r3 + std Y+6,r0 + ldd r0,Y+7 + eor r0,r4 + std Y+7,r0 + ldd r0,Y+8 + eor r0,r5 + std Y+8,r0 + movw r20,r10 + movw r22,r12 + movw r10,r6 + movw r12,r8 + movw r6,r14 + movw r8,r24 + movw r14,r2 + movw r24,r4 + movw r2,r20 + movw r4,r22 + eor r2,r10 + eor r3,r11 + eor r4,r12 + eor r5,r13 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+13 + ldd r23,Y+14 + eor r20,r22 + eor r21,r23 + std Y+9,r22 + std Y+10,r23 + ldd r22,Y+1 + ldd r23,Y+2 + std Y+13,r22 + std Y+14,r23 + ldd r22,Y+5 + ldd r23,Y+6 + std Y+1,r22 + std Y+2,r23 + std Y+5,r20 + std Y+6,r21 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r22,Y+15 + ldd r23,Y+16 + eor r20,r22 + eor r21,r23 + std Y+11,r22 + std Y+12,r23 + ldd r22,Y+3 + ldd r23,Y+4 + std Y+15,r22 + std Y+16,r23 + ldd r22,Y+7 + ldd r23,Y+8 + std Y+3,r22 + std Y+4,r23 + std Y+7,r20 + std Y+8,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+13 + ldd r23,Y+14 + eor r20,r22 + eor r21,r23 + eor r22,r10 + eor r23,r11 + std Y+9,r22 + std Y+10,r23 + ldd r22,Y+1 + ldd r23,Y+2 + eor r22,r6 + eor r23,r7 + std Y+13,r22 + std Y+14,r23 + ldd r22,Y+5 + ldd r23,Y+6 + eor r22,r14 + eor r23,r15 + std Y+1,r22 + std Y+2,r23 + eor r20,r2 + eor r21,r3 + std Y+5,r20 + std Y+6,r21 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r22,Y+15 + ldd r23,Y+16 + eor r20,r22 + eor r21,r23 + eor r22,r12 + eor r23,r13 + std Y+11,r22 + std Y+12,r23 + ldd r22,Y+3 + ldd r23,Y+4 + eor r22,r8 + eor r23,r9 + std Y+15,r22 + std Y+16,r23 + ldd r22,Y+7 + ldd r23,Y+8 + eor r22,r24 + eor r23,r25 + std Y+3,r22 + std Y+4,r23 + eor r20,r4 + eor r21,r5 + std Y+7,r20 + std Y+8,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + movw r20,r12 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r12,r22 + and r13,r22 + lsl r12 + rol r13 + or r12,r20 + or r13,r21 + movw r20,r10 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r10,r22 + and r11,r22 + lsl r10 + rol r11 + lsl r10 + rol r11 + lsl r10 + rol r11 + or r10,r20 + or r11,r21 + movw r20,r8 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r8,r22 + and r9,r22 + lsl r8 + rol r9 + or r8,r20 + or r9,r21 + movw r20,r6 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r6,r22 + and r7,r22 + lsl r6 + rol r7 + lsl r6 + rol r7 + lsl r6 + rol r7 + or r6,r20 + or r7,r21 + movw r20,r24 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r24,r22 + and r25,r22 + lsl r24 + rol r25 + or r24,r20 + or r25,r21 + movw r20,r14 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r14,r22 + and r15,r22 + lsl r14 + rol r15 + lsl r14 + rol r15 + lsl r14 + rol r15 + or r14,r20 + or r15,r21 + movw r20,r4 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r4,r22 + and r5,r22 + lsl r4 + rol r5 + or r4,r20 + or r5,r21 + movw r20,r2 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r2,r22 + and r3,r22 + lsl r2 + rol r3 + lsl r2 + rol r3 + lsl r2 + rol r3 + or r2,r20 + or r3,r21 + ldd r0,Z+48 + eor r10,r0 + ldd r0,Z+49 + eor r11,r0 + ldd r0,Z+50 + eor r12,r0 + ldd r0,Z+51 + eor r13,r0 + ldd r0,Z+52 + eor r6,r0 + ldd r0,Z+53 + eor r7,r0 + ldd r0,Z+54 + eor r8,r0 + ldd r0,Z+55 + eor r9,r0 + ldd r0,Z+56 + eor r14,r0 + ldd r0,Z+57 + eor r15,r0 + ldd r0,Z+58 + eor r24,r0 + ldd r0,Z+59 + eor r25,r0 + ldd r0,Z+60 + eor r2,r0 + ldd r0,Z+61 + eor r3,r0 + ldd r0,Z+62 + eor r4,r0 + ldd r0,Z+63 + eor r5,r0 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r3,Y+2 + ldd r4,Y+3 + ldd r5,Y+4 + ldd r6,Y+5 + ldd r7,Y+6 + ldd r8,Y+7 + ldd r9,Y+8 + ldd r10,Y+9 + ldd r11,Y+10 + ldd r12,Y+11 + ldd r13,Y+12 + ldd r14,Y+13 + ldd r15,Y+14 + ldd r24,Y+15 + ldd r25,Y+16 + movw r20,r12 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r12,r22 + and r13,r22 + lsl r12 + rol r13 + lsl r12 + rol r13 + or r12,r20 + or r13,r21 + movw r20,r24 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r24,r22 + and r25,r22 + lsl r24 + rol r25 + lsl r24 + rol r25 + or r24,r20 + or r25,r21 + movw r20,r4 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r4,r22 + and r5,r22 + lsl r4 + rol r5 + lsl r4 + rol r5 + or r4,r20 + or r5,r21 + movw r20,r8 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r8,r22 + and r9,r22 + lsl r8 + rol r9 + lsl r8 + rol r9 + or r8,r20 + or r9,r21 + ldd r0,Z+32 + eor r10,r0 + ldd r0,Z+33 + eor r11,r0 + ldd r0,Z+34 + eor r12,r0 + ldd r0,Z+35 + eor r13,r0 + ldd r0,Z+36 + eor r14,r0 + ldd r0,Z+37 + eor r15,r0 + ldd r0,Z+38 + eor r24,r0 + ldd r0,Z+39 + eor r25,r0 + ldd r0,Z+40 + eor r2,r0 + ldd r0,Z+41 + eor r3,r0 + ldd r0,Z+42 + eor r4,r0 + ldd r0,Z+43 + eor r5,r0 + ldd r0,Z+44 + eor r6,r0 + ldd r0,Z+45 + eor r7,r0 + ldd r0,Z+46 + eor r8,r0 + ldd r0,Z+47 + eor r9,r0 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r17,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r17 +#endif + add r31,r19 + mov r30,r18 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + inc r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + inc r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r22,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r22,Z +#elif defined(__AVR_TINY__) + ld r22,Z +#else + lpm + mov r22,r0 +#endif + inc r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r23,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r23,Z +#elif defined(__AVR_TINY__) + ld r23,Z +#else + lpm + mov r23,r0 +#endif + inc r18 + eor r10,r20 + eor r11,r21 + eor r12,r22 + eor r13,r23 +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + ret +1447: + rcall 126b + rcall 1453f + dec r16 + brne 1447b + rjmp 2622f +1453: + mov r0,r14 + and r0,r2 + eor r10,r0 + mov r0,r15 + and r0,r3 + eor r11,r0 + mov r0,r24 + and r0,r4 + eor r12,r0 + mov r0,r25 + and r0,r5 + eor r13,r0 + mov r0,r10 + or r0,r6 + eor r14,r0 + mov r0,r11 + or r0,r7 + eor r15,r0 + mov r0,r12 + or r0,r8 + eor r24,r0 + mov r0,r13 + or r0,r9 + eor r25,r0 + mov r0,r14 + or r0,r2 + eor r6,r0 + mov r0,r15 + or r0,r3 + eor r7,r0 + mov r0,r24 + or r0,r4 + eor r8,r0 + mov r0,r25 + or r0,r5 + eor r9,r0 + mov r0,r14 + and r0,r6 + eor r2,r0 + mov r0,r15 + and r0,r7 + eor r3,r0 + mov r0,r24 + and r0,r8 + eor r4,r0 + mov r0,r25 + and r0,r9 + eor r5,r0 + mov r0,r10 + or r0,r2 + eor r14,r0 + mov r0,r11 + or r0,r3 + eor r15,r0 + mov r0,r12 + or r0,r4 + eor r24,r0 + mov r0,r13 + or r0,r5 + eor r25,r0 + mov r0,r14 + or r0,r6 + eor r10,r0 + mov r0,r15 + or r0,r7 + eor r11,r0 + mov r0,r24 + or r0,r8 + eor r12,r0 + mov r0,r25 + or r0,r9 + eor r13,r0 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Y+17 + ldd r3,Y+18 + ldd r4,Y+19 + ldd r5,Y+20 + ldd r6,Y+21 + ldd r7,Y+22 + ldd r8,Y+23 + ldd r9,Y+24 + ldd r10,Y+25 + ldd r11,Y+26 + ldd r12,Y+27 + ldd r13,Y+28 + ldd r14,Y+29 + ldd r15,Y+30 + ldd r24,Y+31 + ldd r25,Y+32 + mov r0,r6 + and r0,r14 + eor r10,r0 + mov r0,r7 + and r0,r15 + eor r11,r0 + mov r0,r8 + and r0,r24 + eor r12,r0 + mov r0,r9 + and r0,r25 + eor r13,r0 + mov r0,r10 + or r0,r2 + eor r6,r0 + mov r0,r11 + or r0,r3 + eor r7,r0 + mov r0,r12 + or r0,r4 + eor r8,r0 + mov r0,r13 + or r0,r5 + eor r9,r0 + mov r0,r6 + or r0,r14 + eor r2,r0 + mov r0,r7 + or r0,r15 + eor r3,r0 + mov r0,r8 + or r0,r24 + eor r4,r0 + mov r0,r9 + or r0,r25 + eor r5,r0 + mov r0,r6 + and r0,r2 + eor r14,r0 + mov r0,r7 + and r0,r3 + eor r15,r0 + mov r0,r8 + and r0,r4 + eor r24,r0 + mov r0,r9 + and r0,r5 + eor r25,r0 + mov r0,r10 + or r0,r14 + eor r6,r0 + mov r0,r11 + or r0,r15 + eor r7,r0 + mov r0,r12 + or r0,r24 + eor r8,r0 + mov r0,r13 + or r0,r25 + eor r9,r0 + mov r0,r6 + or r0,r2 + eor r10,r0 + mov r0,r7 + or r0,r3 + eor r11,r0 + mov r0,r8 + or r0,r4 + eor r12,r0 + mov r0,r9 + or r0,r5 + eor r13,r0 + ldd r0,Y+13 + eor r0,r2 + std Y+13,r0 + ldd r0,Y+14 + eor r0,r3 + std Y+14,r0 + ldd r0,Y+15 + eor r0,r4 + std Y+15,r0 + ldd r0,Y+16 + eor r0,r5 + std Y+16,r0 + ldd r0,Y+1 + eor r0,r6 + std Y+1,r0 + ldd r0,Y+2 + eor r0,r7 + std Y+2,r0 + ldd r0,Y+3 + eor r0,r8 + std Y+3,r0 + ldd r0,Y+4 + eor r0,r9 + std Y+4,r0 + ldd r0,Y+5 + eor r0,r10 + std Y+5,r0 + ldd r0,Y+6 + eor r0,r11 + std Y+6,r0 + ldd r0,Y+7 + eor r0,r12 + std Y+7,r0 + ldd r0,Y+8 + eor r0,r13 + std Y+8,r0 + ldd r0,Y+9 + eor r0,r14 + std Y+9,r0 + ldd r0,Y+10 + eor r0,r15 + std Y+10,r0 + ldd r0,Y+11 + eor r0,r24 + std Y+11,r0 + ldd r0,Y+12 + eor r0,r25 + std Y+12,r0 + movw r20,r2 + movw r22,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r20 + movw r24,r22 + eor r14,r2 + eor r15,r3 + eor r24,r4 + eor r25,r5 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+1 + ldd r23,Y+2 + eor r20,r22 + eor r21,r23 + std Y+13,r22 + std Y+14,r23 + ldd r22,Y+5 + ldd r23,Y+6 + std Y+1,r22 + std Y+2,r23 + ldd r22,Y+9 + ldd r23,Y+10 + std Y+5,r22 + std Y+6,r23 + std Y+9,r20 + std Y+10,r21 + ldd r20,Y+15 + ldd r21,Y+16 + ldd r22,Y+3 + ldd r23,Y+4 + eor r20,r22 + eor r21,r23 + std Y+15,r22 + std Y+16,r23 + ldd r22,Y+7 + ldd r23,Y+8 + std Y+3,r22 + std Y+4,r23 + ldd r22,Y+11 + ldd r23,Y+12 + std Y+7,r22 + std Y+8,r23 + std Y+11,r20 + std Y+12,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+1 + ldd r23,Y+2 + eor r20,r22 + eor r21,r23 + eor r22,r2 + eor r23,r3 + std Y+13,r22 + std Y+14,r23 + ldd r22,Y+5 + ldd r23,Y+6 + eor r22,r6 + eor r23,r7 + std Y+1,r22 + std Y+2,r23 + ldd r22,Y+9 + ldd r23,Y+10 + eor r22,r10 + eor r23,r11 + std Y+5,r22 + std Y+6,r23 + eor r20,r14 + eor r21,r15 + std Y+9,r20 + std Y+10,r21 + ldd r20,Y+15 + ldd r21,Y+16 + ldd r22,Y+3 + ldd r23,Y+4 + eor r20,r22 + eor r21,r23 + eor r22,r4 + eor r23,r5 + std Y+15,r22 + std Y+16,r23 + ldd r22,Y+7 + ldd r23,Y+8 + eor r22,r8 + eor r23,r9 + std Y+3,r22 + std Y+4,r23 + ldd r22,Y+11 + ldd r23,Y+12 + eor r22,r12 + eor r23,r13 + std Y+7,r22 + std Y+8,r23 + eor r20,r24 + eor r21,r25 + std Y+11,r20 + std Y+12,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + mov r0,r6 + and r0,r10 + eor r2,r0 + mov r0,r7 + and r0,r11 + eor r3,r0 + mov r0,r8 + and r0,r12 + eor r4,r0 + mov r0,r9 + and r0,r13 + eor r5,r0 + mov r0,r2 + or r0,r14 + eor r6,r0 + mov r0,r3 + or r0,r15 + eor r7,r0 + mov r0,r4 + or r0,r24 + eor r8,r0 + mov r0,r5 + or r0,r25 + eor r9,r0 + mov r0,r6 + or r0,r10 + eor r14,r0 + mov r0,r7 + or r0,r11 + eor r15,r0 + mov r0,r8 + or r0,r12 + eor r24,r0 + mov r0,r9 + or r0,r13 + eor r25,r0 + mov r0,r6 + and r0,r14 + eor r10,r0 + mov r0,r7 + and r0,r15 + eor r11,r0 + mov r0,r8 + and r0,r24 + eor r12,r0 + mov r0,r9 + and r0,r25 + eor r13,r0 + mov r0,r2 + or r0,r10 + eor r6,r0 + mov r0,r3 + or r0,r11 + eor r7,r0 + mov r0,r4 + or r0,r12 + eor r8,r0 + mov r0,r5 + or r0,r13 + eor r9,r0 + mov r0,r6 + or r0,r14 + eor r2,r0 + mov r0,r7 + or r0,r15 + eor r3,r0 + mov r0,r8 + or r0,r24 + eor r4,r0 + mov r0,r9 + or r0,r25 + eor r5,r0 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + mov r0,r25 + mov r25,r24 + mov r24,r0 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + mov r0,r9 + mov r9,r8 + mov r8,r0 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + mov r0,r5 + mov r5,r4 + mov r4,r0 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + mov r0,r13 + mov r13,r12 + mov r12,r0 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r3,Y+2 + ldd r4,Y+3 + ldd r5,Y+4 + ldd r6,Y+5 + ldd r7,Y+6 + ldd r8,Y+7 + ldd r9,Y+8 + ldd r10,Y+9 + ldd r11,Y+10 + ldd r12,Y+11 + ldd r13,Y+12 + ldd r14,Y+13 + ldd r15,Y+14 + ldd r24,Y+15 + ldd r25,Y+16 + mov r0,r2 + and r0,r6 + eor r14,r0 + mov r0,r3 + and r0,r7 + eor r15,r0 + mov r0,r4 + and r0,r8 + eor r24,r0 + mov r0,r5 + and r0,r9 + eor r25,r0 + mov r0,r14 + or r0,r10 + eor r2,r0 + mov r0,r15 + or r0,r11 + eor r3,r0 + mov r0,r24 + or r0,r12 + eor r4,r0 + mov r0,r25 + or r0,r13 + eor r5,r0 + mov r0,r2 + or r0,r6 + eor r10,r0 + mov r0,r3 + or r0,r7 + eor r11,r0 + mov r0,r4 + or r0,r8 + eor r12,r0 + mov r0,r5 + or r0,r9 + eor r13,r0 + mov r0,r2 + and r0,r10 + eor r6,r0 + mov r0,r3 + and r0,r11 + eor r7,r0 + mov r0,r4 + and r0,r12 + eor r8,r0 + mov r0,r5 + and r0,r13 + eor r9,r0 + mov r0,r14 + or r0,r6 + eor r2,r0 + mov r0,r15 + or r0,r7 + eor r3,r0 + mov r0,r24 + or r0,r8 + eor r4,r0 + mov r0,r25 + or r0,r9 + eor r5,r0 + mov r0,r2 + or r0,r10 + eor r14,r0 + mov r0,r3 + or r0,r11 + eor r15,r0 + mov r0,r4 + or r0,r12 + eor r24,r0 + mov r0,r5 + or r0,r13 + eor r25,r0 + mov r0,r5 + mov r5,r4 + mov r4,r0 + mov r0,r9 + mov r9,r8 + mov r8,r0 + mov r0,r13 + mov r13,r12 + mov r12,r0 + mov r0,r25 + mov r25,r24 + mov r24,r0 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Y+17 + ldd r3,Y+18 + ldd r4,Y+19 + ldd r5,Y+20 + ldd r6,Y+21 + ldd r7,Y+22 + ldd r8,Y+23 + ldd r9,Y+24 + ldd r10,Y+25 + ldd r11,Y+26 + ldd r12,Y+27 + ldd r13,Y+28 + ldd r14,Y+29 + ldd r15,Y+30 + ldd r24,Y+31 + ldd r25,Y+32 + ldd r0,Y+1 + eor r0,r14 + std Y+1,r0 + ldd r0,Y+2 + eor r0,r15 + std Y+2,r0 + ldd r0,Y+3 + eor r0,r24 + std Y+3,r0 + ldd r0,Y+4 + eor r0,r25 + std Y+4,r0 + ldd r0,Y+5 + eor r0,r6 + std Y+5,r0 + ldd r0,Y+6 + eor r0,r7 + std Y+6,r0 + ldd r0,Y+7 + eor r0,r8 + std Y+7,r0 + ldd r0,Y+8 + eor r0,r9 + std Y+8,r0 + ldd r0,Y+9 + eor r0,r2 + std Y+9,r0 + ldd r0,Y+10 + eor r0,r3 + std Y+10,r0 + ldd r0,Y+11 + eor r0,r4 + std Y+11,r0 + ldd r0,Y+12 + eor r0,r5 + std Y+12,r0 + ldd r0,Y+13 + eor r0,r10 + std Y+13,r0 + ldd r0,Y+14 + eor r0,r11 + std Y+14,r0 + ldd r0,Y+15 + eor r0,r12 + std Y+15,r0 + ldd r0,Y+16 + eor r0,r13 + std Y+16,r0 + movw r20,r14 + movw r22,r24 + movw r14,r6 + movw r24,r8 + movw r6,r2 + movw r8,r4 + movw r2,r10 + movw r4,r12 + movw r10,r20 + movw r12,r22 + eor r10,r14 + eor r11,r15 + eor r12,r24 + eor r13,r25 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+5 + ldd r23,Y+6 + eor r20,r22 + eor r21,r23 + std Y+1,r22 + std Y+2,r23 + ldd r22,Y+9 + ldd r23,Y+10 + std Y+5,r22 + std Y+6,r23 + ldd r22,Y+13 + ldd r23,Y+14 + std Y+9,r22 + std Y+10,r23 + std Y+13,r20 + std Y+14,r21 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r22,Y+7 + ldd r23,Y+8 + eor r20,r22 + eor r21,r23 + std Y+3,r22 + std Y+4,r23 + ldd r22,Y+11 + ldd r23,Y+12 + std Y+7,r22 + std Y+8,r23 + ldd r22,Y+15 + ldd r23,Y+16 + std Y+11,r22 + std Y+12,r23 + std Y+15,r20 + std Y+16,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+5 + ldd r23,Y+6 + eor r20,r22 + eor r21,r23 + eor r22,r14 + eor r23,r15 + std Y+1,r22 + std Y+2,r23 + ldd r22,Y+9 + ldd r23,Y+10 + eor r22,r6 + eor r23,r7 + std Y+5,r22 + std Y+6,r23 + ldd r22,Y+13 + ldd r23,Y+14 + eor r22,r2 + eor r23,r3 + std Y+9,r22 + std Y+10,r23 + eor r20,r10 + eor r21,r11 + std Y+13,r20 + std Y+14,r21 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r22,Y+7 + ldd r23,Y+8 + eor r20,r22 + eor r21,r23 + eor r22,r24 + eor r23,r25 + std Y+3,r22 + std Y+4,r23 + ldd r22,Y+11 + ldd r23,Y+12 + eor r22,r8 + eor r23,r9 + std Y+7,r22 + std Y+8,r23 + ldd r22,Y+15 + ldd r23,Y+16 + eor r22,r4 + eor r23,r5 + std Y+11,r22 + std Y+12,r23 + eor r20,r12 + eor r21,r13 + std Y+15,r20 + std Y+16,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + mov r0,r15 + mov r15,r14 + mov r14,r0 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + mov r0,r7 + mov r7,r6 + mov r6,r0 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + mov r0,r3 + mov r3,r2 + mov r2,r0 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + mov r0,r11 + mov r11,r10 + mov r10,r0 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r17,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r17 +#endif + add r31,r19 + mov r30,r18 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + inc r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + inc r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r22,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r22,Z +#elif defined(__AVR_TINY__) + ld r22,Z +#else + lpm + mov r22,r0 +#endif + inc r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r23,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r23,Z +#elif defined(__AVR_TINY__) + ld r23,Z +#else + lpm + mov r23,r0 +#endif + inc r18 +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + ldd r0,Z+16 + eor r14,r0 + ldd r0,Z+17 + eor r15,r0 + ldd r0,Z+18 + eor r24,r0 + ldd r0,Z+19 + eor r25,r0 + ldd r0,Z+20 + eor r6,r0 + ldd r0,Z+21 + eor r7,r0 + ldd r0,Z+22 + eor r8,r0 + ldd r0,Z+23 + eor r9,r0 + ldd r0,Z+24 + eor r2,r0 + ldd r0,Z+25 + eor r3,r0 + ldd r0,Z+26 + eor r4,r0 + ldd r0,Z+27 + eor r5,r0 + ldd r0,Z+28 + eor r10,r0 + ldd r0,Z+29 + eor r11,r0 + ldd r0,Z+30 + eor r12,r0 + ldd r0,Z+31 + eor r13,r0 + std Y+17,r14 + std Y+18,r15 + std Y+19,r24 + std Y+20,r25 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r2 + std Y+26,r3 + std Y+27,r4 + std Y+28,r5 + std Y+29,r10 + std Y+30,r11 + std Y+31,r12 + std Y+32,r13 + ldd r2,Y+1 + ldd r3,Y+2 + ldd r4,Y+3 + ldd r5,Y+4 + ldd r6,Y+5 + ldd r7,Y+6 + ldd r8,Y+7 + ldd r9,Y+8 + ldd r10,Y+9 + ldd r11,Y+10 + ldd r12,Y+11 + ldd r13,Y+12 + ldd r14,Y+13 + ldd r15,Y+14 + ldd r24,Y+15 + ldd r25,Y+16 + mov r0,r5 + mov r5,r4 + mov r4,r0 + mov r0,r9 + mov r9,r8 + mov r8,r0 + mov r0,r13 + mov r13,r12 + mov r12,r0 + mov r0,r25 + mov r25,r24 + mov r24,r0 + ld r0,Z + eor r2,r0 + ldd r0,Z+1 + eor r3,r0 + ldd r0,Z+2 + eor r4,r0 + ldd r0,Z+3 + eor r5,r0 + ldd r0,Z+4 + eor r6,r0 + ldd r0,Z+5 + eor r7,r0 + ldd r0,Z+6 + eor r8,r0 + ldd r0,Z+7 + eor r9,r0 + ldd r0,Z+8 + eor r10,r0 + ldd r0,Z+9 + eor r11,r0 + ldd r0,Z+10 + eor r12,r0 + ldd r0,Z+11 + eor r13,r0 + ldd r0,Z+12 + eor r14,r0 + ldd r0,Z+13 + eor r15,r0 + ldd r0,Z+14 + eor r24,r0 + ldd r0,Z+15 + eor r25,r0 + eor r2,r20 + eor r3,r21 + eor r4,r22 + eor r5,r23 + ret +2622: + ldd r26,Y+33 + ldd r27,Y+34 + st X+,r2 + st X+,r3 + st X+,r6 + st X+,r7 + st X+,r10 + st X+,r11 + st X+,r14 + st X+,r15 + ldd r20,Y+17 + ldd r21,Y+18 + st X+,r20 + st X+,r21 + ldd r20,Y+21 + ldd r21,Y+22 + st X+,r20 + st X+,r21 + ldd r20,Y+25 + ldd r21,Y+26 + st X+,r20 + st X+,r21 + ldd r20,Y+29 + ldd r21,Y+30 + st X+,r20 + st X+,r21 + st X+,r4 + st X+,r5 + st X+,r8 + st X+,r9 + st X+,r12 + st X+,r13 + st X+,r24 + st X+,r25 + ldd r20,Y+19 + ldd r21,Y+20 + st X+,r20 + st X+,r21 + ldd r20,Y+23 + ldd r21,Y+24 + st X+,r20 + st X+,r21 + ldd r20,Y+27 + ldd r21,Y+28 + st X+,r20 + st X+,r21 + ldd r20,Y+31 + ldd r21,Y+32 + st X+,r20 + st X+,r21 + adiw r28,34 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size saturnin_encrypt_block, .-saturnin_encrypt_block + + .text +.global saturnin_decrypt_block + .type saturnin_decrypt_block, @function +saturnin_decrypt_block: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r23 + push r22 + movw r30,r24 + movw r26,r20 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 52 + ld r2,X+ + ld r3,X+ + ld r6,X+ + ld r7,X+ + ld r10,X+ + ld r11,X+ + ld r14,X+ + ld r15,X+ + ld r20,X+ + ld r21,X+ + std Y+17,r20 + std Y+18,r21 + ld r20,X+ + ld r21,X+ + std Y+21,r20 + std Y+22,r21 + ld r20,X+ + ld r21,X+ + std Y+25,r20 + std Y+26,r21 + ld r20,X+ + ld r21,X+ + std Y+29,r20 + std Y+30,r21 + ld r4,X+ + ld r5,X+ + ld r8,X+ + ld r9,X+ + ld r12,X+ + ld r13,X+ + ld r24,X+ + ld r25,X+ + ld r20,X+ + ld r21,X+ + std Y+19,r20 + std Y+20,r21 + ld r20,X+ + ld r21,X+ + std Y+23,r20 + std Y+24,r21 + ld r20,X+ + ld r21,X+ + std Y+27,r20 + std Y+28,r21 + ld r20,X+ + ld r21,X+ + std Y+31,r20 + std Y+32,r21 + ldi r16,10 + cpi r18,60 + cpc r19,r1 + brcs 56f + ldi r16,16 + ldi r17,4 + add r18,r17 + adc r19,r1 +56: + add r18,r16 + adc r19,r1 + lsl r18 + rol r19 + lsl r18 + rol r19 + rjmp 1233f +64: + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r17,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r17 +#endif + add r31,r19 + dec r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r23,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r23,Z +#elif defined(__AVR_TINY__) + ld r23,Z +#else + lpm + mov r23,r0 +#endif + dec r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r22,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r22,Z +#elif defined(__AVR_TINY__) + ld r22,Z +#else + lpm + mov r22,r0 +#endif + dec r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + dec r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + eor r2,r20 + eor r3,r21 + eor r4,r22 + eor r5,r23 +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + ld r0,Z + eor r2,r0 + ldd r0,Z+1 + eor r3,r0 + ldd r0,Z+2 + eor r4,r0 + ldd r0,Z+3 + eor r5,r0 + ldd r0,Z+4 + eor r6,r0 + ldd r0,Z+5 + eor r7,r0 + ldd r0,Z+6 + eor r8,r0 + ldd r0,Z+7 + eor r9,r0 + ldd r0,Z+8 + eor r10,r0 + ldd r0,Z+9 + eor r11,r0 + ldd r0,Z+10 + eor r12,r0 + ldd r0,Z+11 + eor r13,r0 + ldd r0,Z+12 + eor r14,r0 + ldd r0,Z+13 + eor r15,r0 + ldd r0,Z+14 + eor r24,r0 + ldd r0,Z+15 + eor r25,r0 + mov r0,r5 + mov r5,r4 + mov r4,r0 + mov r0,r9 + mov r9,r8 + mov r8,r0 + mov r0,r13 + mov r13,r12 + mov r12,r0 + mov r0,r25 + mov r25,r24 + mov r24,r0 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r14,Y+17 + ldd r15,Y+18 + ldd r24,Y+19 + ldd r25,Y+20 + ldd r6,Y+21 + ldd r7,Y+22 + ldd r8,Y+23 + ldd r9,Y+24 + ldd r2,Y+25 + ldd r3,Y+26 + ldd r4,Y+27 + ldd r5,Y+28 + ldd r10,Y+29 + ldd r11,Y+30 + ldd r12,Y+31 + ldd r13,Y+32 + ldd r0,Z+16 + eor r14,r0 + ldd r0,Z+17 + eor r15,r0 + ldd r0,Z+18 + eor r24,r0 + ldd r0,Z+19 + eor r25,r0 + ldd r0,Z+20 + eor r6,r0 + ldd r0,Z+21 + eor r7,r0 + ldd r0,Z+22 + eor r8,r0 + ldd r0,Z+23 + eor r9,r0 + ldd r0,Z+24 + eor r2,r0 + ldd r0,Z+25 + eor r3,r0 + ldd r0,Z+26 + eor r4,r0 + ldd r0,Z+27 + eor r5,r0 + ldd r0,Z+28 + eor r10,r0 + ldd r0,Z+29 + eor r11,r0 + ldd r0,Z+30 + eor r12,r0 + ldd r0,Z+31 + eor r13,r0 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + mov r0,r25 + mov r25,r24 + mov r24,r0 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + mov r0,r9 + mov r9,r8 + mov r8,r0 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + mov r0,r5 + mov r5,r4 + mov r4,r0 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + mov r0,r13 + mov r13,r12 + mov r12,r0 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+13 + ldd r21,Y+14 + eor r20,r10 + eor r21,r11 + ldd r22,Y+9 + ldd r23,Y+10 + eor r22,r2 + eor r23,r3 + std Y+13,r22 + std Y+14,r23 + ldd r22,Y+5 + ldd r23,Y+6 + eor r22,r6 + eor r23,r7 + std Y+9,r22 + std Y+10,r23 + ldd r22,Y+1 + ldd r23,Y+2 + eor r22,r14 + eor r23,r15 + eor r20,r22 + eor r21,r23 + std Y+5,r22 + std Y+6,r23 + std Y+1,r20 + std Y+2,r21 + ldd r20,Y+15 + ldd r21,Y+16 + eor r20,r12 + eor r21,r13 + ldd r22,Y+11 + ldd r23,Y+12 + eor r22,r4 + eor r23,r5 + std Y+15,r22 + std Y+16,r23 + ldd r22,Y+7 + ldd r23,Y+8 + eor r22,r8 + eor r23,r9 + std Y+11,r22 + std Y+12,r23 + ldd r22,Y+3 + ldd r23,Y+4 + eor r22,r24 + eor r23,r25 + eor r20,r22 + eor r21,r23 + std Y+7,r22 + std Y+8,r23 + std Y+3,r20 + std Y+4,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+9 + ldd r23,Y+10 + std Y+13,r22 + std Y+14,r23 + ldd r22,Y+5 + ldd r23,Y+6 + std Y+9,r22 + std Y+10,r23 + ldd r22,Y+1 + ldd r23,Y+2 + eor r20,r22 + eor r21,r23 + std Y+5,r22 + std Y+6,r23 + std Y+1,r20 + std Y+2,r21 + ldd r20,Y+15 + ldd r21,Y+16 + ldd r22,Y+11 + ldd r23,Y+12 + std Y+15,r22 + std Y+16,r23 + ldd r22,Y+7 + ldd r23,Y+8 + std Y+11,r22 + std Y+12,r23 + ldd r22,Y+3 + ldd r23,Y+4 + eor r20,r22 + eor r21,r23 + std Y+7,r22 + std Y+8,r23 + std Y+3,r20 + std Y+4,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + movw r20,r10 + movw r22,r12 + movw r10,r2 + movw r12,r4 + movw r2,r6 + movw r4,r8 + movw r6,r14 + movw r8,r24 + movw r14,r20 + movw r24,r22 + eor r14,r6 + eor r15,r7 + eor r24,r8 + eor r25,r9 + ldd r0,Y+1 + eor r0,r14 + std Y+1,r0 + ldd r0,Y+2 + eor r0,r15 + std Y+2,r0 + ldd r0,Y+3 + eor r0,r24 + std Y+3,r0 + ldd r0,Y+4 + eor r0,r25 + std Y+4,r0 + ldd r0,Y+5 + eor r0,r6 + std Y+5,r0 + ldd r0,Y+6 + eor r0,r7 + std Y+6,r0 + ldd r0,Y+7 + eor r0,r8 + std Y+7,r0 + ldd r0,Y+8 + eor r0,r9 + std Y+8,r0 + ldd r0,Y+9 + eor r0,r2 + std Y+9,r0 + ldd r0,Y+10 + eor r0,r3 + std Y+10,r0 + ldd r0,Y+11 + eor r0,r4 + std Y+11,r0 + ldd r0,Y+12 + eor r0,r5 + std Y+12,r0 + ldd r0,Y+13 + eor r0,r10 + std Y+13,r0 + ldd r0,Y+14 + eor r0,r11 + std Y+14,r0 + ldd r0,Y+15 + eor r0,r12 + std Y+15,r0 + ldd r0,Y+16 + eor r0,r13 + std Y+16,r0 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r3,Y+2 + ldd r4,Y+3 + ldd r5,Y+4 + ldd r6,Y+5 + ldd r7,Y+6 + ldd r8,Y+7 + ldd r9,Y+8 + ldd r10,Y+9 + ldd r11,Y+10 + ldd r12,Y+11 + ldd r13,Y+12 + ldd r14,Y+13 + ldd r15,Y+14 + ldd r24,Y+15 + ldd r25,Y+16 + mov r0,r5 + mov r5,r4 + mov r4,r0 + mov r0,r9 + mov r9,r8 + mov r8,r0 + mov r0,r13 + mov r13,r12 + mov r12,r0 + mov r0,r25 + mov r25,r24 + mov r24,r0 + mov r0,r2 + or r0,r10 + eor r14,r0 + mov r0,r3 + or r0,r11 + eor r15,r0 + mov r0,r4 + or r0,r12 + eor r24,r0 + mov r0,r5 + or r0,r13 + eor r25,r0 + mov r0,r14 + or r0,r6 + eor r2,r0 + mov r0,r15 + or r0,r7 + eor r3,r0 + mov r0,r24 + or r0,r8 + eor r4,r0 + mov r0,r25 + or r0,r9 + eor r5,r0 + mov r0,r2 + and r0,r10 + eor r6,r0 + mov r0,r3 + and r0,r11 + eor r7,r0 + mov r0,r4 + and r0,r12 + eor r8,r0 + mov r0,r5 + and r0,r13 + eor r9,r0 + mov r0,r2 + or r0,r6 + eor r10,r0 + mov r0,r3 + or r0,r7 + eor r11,r0 + mov r0,r4 + or r0,r8 + eor r12,r0 + mov r0,r5 + or r0,r9 + eor r13,r0 + mov r0,r14 + or r0,r10 + eor r2,r0 + mov r0,r15 + or r0,r11 + eor r3,r0 + mov r0,r24 + or r0,r12 + eor r4,r0 + mov r0,r25 + or r0,r13 + eor r5,r0 + mov r0,r2 + and r0,r6 + eor r14,r0 + mov r0,r3 + and r0,r7 + eor r15,r0 + mov r0,r4 + and r0,r8 + eor r24,r0 + mov r0,r5 + and r0,r9 + eor r25,r0 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Y+17 + ldd r3,Y+18 + ldd r4,Y+19 + ldd r5,Y+20 + ldd r6,Y+21 + ldd r7,Y+22 + ldd r8,Y+23 + ldd r9,Y+24 + ldd r10,Y+25 + ldd r11,Y+26 + ldd r12,Y+27 + ldd r13,Y+28 + ldd r14,Y+29 + ldd r15,Y+30 + ldd r24,Y+31 + ldd r25,Y+32 + mov r0,r15 + mov r15,r14 + mov r14,r0 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + mov r0,r7 + mov r7,r6 + mov r6,r0 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + mov r0,r3 + mov r3,r2 + mov r2,r0 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + mov r0,r11 + mov r11,r10 + mov r10,r0 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + mov r0,r6 + or r0,r14 + eor r2,r0 + mov r0,r7 + or r0,r15 + eor r3,r0 + mov r0,r8 + or r0,r24 + eor r4,r0 + mov r0,r9 + or r0,r25 + eor r5,r0 + mov r0,r2 + or r0,r10 + eor r6,r0 + mov r0,r3 + or r0,r11 + eor r7,r0 + mov r0,r4 + or r0,r12 + eor r8,r0 + mov r0,r5 + or r0,r13 + eor r9,r0 + mov r0,r6 + and r0,r14 + eor r10,r0 + mov r0,r7 + and r0,r15 + eor r11,r0 + mov r0,r8 + and r0,r24 + eor r12,r0 + mov r0,r9 + and r0,r25 + eor r13,r0 + mov r0,r6 + or r0,r10 + eor r14,r0 + mov r0,r7 + or r0,r11 + eor r15,r0 + mov r0,r8 + or r0,r12 + eor r24,r0 + mov r0,r9 + or r0,r13 + eor r25,r0 + mov r0,r2 + or r0,r14 + eor r6,r0 + mov r0,r3 + or r0,r15 + eor r7,r0 + mov r0,r4 + or r0,r24 + eor r8,r0 + mov r0,r5 + or r0,r25 + eor r9,r0 + mov r0,r6 + and r0,r10 + eor r2,r0 + mov r0,r7 + and r0,r11 + eor r3,r0 + mov r0,r8 + and r0,r12 + eor r4,r0 + mov r0,r9 + and r0,r13 + eor r5,r0 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+9 + ldd r21,Y+10 + eor r20,r14 + eor r21,r15 + ldd r22,Y+5 + ldd r23,Y+6 + eor r22,r10 + eor r23,r11 + std Y+9,r22 + std Y+10,r23 + ldd r22,Y+1 + ldd r23,Y+2 + eor r22,r6 + eor r23,r7 + std Y+5,r22 + std Y+6,r23 + ldd r22,Y+13 + ldd r23,Y+14 + eor r22,r2 + eor r23,r3 + eor r20,r22 + eor r21,r23 + std Y+1,r22 + std Y+2,r23 + std Y+13,r20 + std Y+14,r21 + ldd r20,Y+11 + ldd r21,Y+12 + eor r20,r24 + eor r21,r25 + ldd r22,Y+7 + ldd r23,Y+8 + eor r22,r12 + eor r23,r13 + std Y+11,r22 + std Y+12,r23 + ldd r22,Y+3 + ldd r23,Y+4 + eor r22,r8 + eor r23,r9 + std Y+7,r22 + std Y+8,r23 + ldd r22,Y+15 + ldd r23,Y+16 + eor r22,r4 + eor r23,r5 + eor r20,r22 + eor r21,r23 + std Y+3,r22 + std Y+4,r23 + std Y+15,r20 + std Y+16,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+5 + ldd r23,Y+6 + std Y+9,r22 + std Y+10,r23 + ldd r22,Y+1 + ldd r23,Y+2 + std Y+5,r22 + std Y+6,r23 + ldd r22,Y+13 + ldd r23,Y+14 + eor r20,r22 + eor r21,r23 + std Y+1,r22 + std Y+2,r23 + std Y+13,r20 + std Y+14,r21 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r22,Y+7 + ldd r23,Y+8 + std Y+11,r22 + std Y+12,r23 + ldd r22,Y+3 + ldd r23,Y+4 + std Y+7,r22 + std Y+8,r23 + ldd r22,Y+15 + ldd r23,Y+16 + eor r20,r22 + eor r21,r23 + std Y+3,r22 + std Y+4,r23 + std Y+15,r20 + std Y+16,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + movw r20,r14 + movw r22,r24 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r20 + movw r4,r22 + eor r2,r6 + eor r3,r7 + eor r4,r8 + eor r5,r9 + ldd r0,Y+13 + eor r0,r2 + std Y+13,r0 + ldd r0,Y+14 + eor r0,r3 + std Y+14,r0 + ldd r0,Y+15 + eor r0,r4 + std Y+15,r0 + ldd r0,Y+16 + eor r0,r5 + std Y+16,r0 + ldd r0,Y+1 + eor r0,r6 + std Y+1,r0 + ldd r0,Y+2 + eor r0,r7 + std Y+2,r0 + ldd r0,Y+3 + eor r0,r8 + std Y+3,r0 + ldd r0,Y+4 + eor r0,r9 + std Y+4,r0 + ldd r0,Y+5 + eor r0,r10 + std Y+5,r0 + ldd r0,Y+6 + eor r0,r11 + std Y+6,r0 + ldd r0,Y+7 + eor r0,r12 + std Y+7,r0 + ldd r0,Y+8 + eor r0,r13 + std Y+8,r0 + ldd r0,Y+9 + eor r0,r14 + std Y+9,r0 + ldd r0,Y+10 + eor r0,r15 + std Y+10,r0 + ldd r0,Y+11 + eor r0,r24 + std Y+11,r0 + ldd r0,Y+12 + eor r0,r25 + std Y+12,r0 + mov r0,r6 + or r0,r2 + eor r10,r0 + mov r0,r7 + or r0,r3 + eor r11,r0 + mov r0,r8 + or r0,r4 + eor r12,r0 + mov r0,r9 + or r0,r5 + eor r13,r0 + mov r0,r10 + or r0,r14 + eor r6,r0 + mov r0,r11 + or r0,r15 + eor r7,r0 + mov r0,r12 + or r0,r24 + eor r8,r0 + mov r0,r13 + or r0,r25 + eor r9,r0 + mov r0,r6 + and r0,r2 + eor r14,r0 + mov r0,r7 + and r0,r3 + eor r15,r0 + mov r0,r8 + and r0,r4 + eor r24,r0 + mov r0,r9 + and r0,r5 + eor r25,r0 + mov r0,r6 + or r0,r14 + eor r2,r0 + mov r0,r7 + or r0,r15 + eor r3,r0 + mov r0,r8 + or r0,r24 + eor r4,r0 + mov r0,r9 + or r0,r25 + eor r5,r0 + mov r0,r10 + or r0,r2 + eor r6,r0 + mov r0,r11 + or r0,r3 + eor r7,r0 + mov r0,r12 + or r0,r4 + eor r8,r0 + mov r0,r13 + or r0,r5 + eor r9,r0 + mov r0,r6 + and r0,r14 + eor r10,r0 + mov r0,r7 + and r0,r15 + eor r11,r0 + mov r0,r8 + and r0,r24 + eor r12,r0 + mov r0,r9 + and r0,r25 + eor r13,r0 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r3,Y+2 + ldd r4,Y+3 + ldd r5,Y+4 + ldd r6,Y+5 + ldd r7,Y+6 + ldd r8,Y+7 + ldd r9,Y+8 + ldd r10,Y+9 + ldd r11,Y+10 + ldd r12,Y+11 + ldd r13,Y+12 + ldd r14,Y+13 + ldd r15,Y+14 + ldd r24,Y+15 + ldd r25,Y+16 + mov r0,r14 + or r0,r6 + eor r10,r0 + mov r0,r15 + or r0,r7 + eor r11,r0 + mov r0,r24 + or r0,r8 + eor r12,r0 + mov r0,r25 + or r0,r9 + eor r13,r0 + mov r0,r10 + or r0,r2 + eor r14,r0 + mov r0,r11 + or r0,r3 + eor r15,r0 + mov r0,r12 + or r0,r4 + eor r24,r0 + mov r0,r13 + or r0,r5 + eor r25,r0 + mov r0,r14 + and r0,r6 + eor r2,r0 + mov r0,r15 + and r0,r7 + eor r3,r0 + mov r0,r24 + and r0,r8 + eor r4,r0 + mov r0,r25 + and r0,r9 + eor r5,r0 + mov r0,r14 + or r0,r2 + eor r6,r0 + mov r0,r15 + or r0,r3 + eor r7,r0 + mov r0,r24 + or r0,r4 + eor r8,r0 + mov r0,r25 + or r0,r5 + eor r9,r0 + mov r0,r10 + or r0,r6 + eor r14,r0 + mov r0,r11 + or r0,r7 + eor r15,r0 + mov r0,r12 + or r0,r8 + eor r24,r0 + mov r0,r13 + or r0,r9 + eor r25,r0 + mov r0,r14 + and r0,r2 + eor r10,r0 + mov r0,r15 + and r0,r3 + eor r11,r0 + mov r0,r24 + and r0,r4 + eor r12,r0 + mov r0,r25 + and r0,r5 + eor r13,r0 + ret +1233: + rcall 64b + rcall 1239f + subi r16,2 + brne 1233b + rjmp 2560f +1239: + ldd r0,Z+32 + eor r10,r0 + ldd r0,Z+33 + eor r11,r0 + ldd r0,Z+34 + eor r12,r0 + ldd r0,Z+35 + eor r13,r0 + ldd r0,Z+36 + eor r14,r0 + ldd r0,Z+37 + eor r15,r0 + ldd r0,Z+38 + eor r24,r0 + ldd r0,Z+39 + eor r25,r0 + ldd r0,Z+40 + eor r2,r0 + ldd r0,Z+41 + eor r3,r0 + ldd r0,Z+42 + eor r4,r0 + ldd r0,Z+43 + eor r5,r0 + ldd r0,Z+44 + eor r6,r0 + ldd r0,Z+45 + eor r7,r0 + ldd r0,Z+46 + eor r8,r0 + ldd r0,Z+47 + eor r9,r0 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r17,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r17 +#endif + add r31,r19 + dec r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r23,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r23,Z +#elif defined(__AVR_TINY__) + ld r23,Z +#else + lpm + mov r23,r0 +#endif + dec r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r22,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r22,Z +#elif defined(__AVR_TINY__) + ld r22,Z +#else + lpm + mov r22,r0 +#endif + dec r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + dec r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + eor r10,r20 + eor r11,r21 + eor r12,r22 + eor r13,r23 +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + movw r20,r12 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r12,r22 + and r13,r22 + lsl r12 + rol r13 + lsl r12 + rol r13 + or r12,r20 + or r13,r21 + movw r20,r24 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r24,r22 + and r25,r22 + lsl r24 + rol r25 + lsl r24 + rol r25 + or r24,r20 + or r25,r21 + movw r20,r4 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r4,r22 + and r5,r22 + lsl r4 + rol r5 + lsl r4 + rol r5 + or r4,r20 + or r5,r21 + movw r20,r8 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r8,r22 + and r9,r22 + lsl r8 + rol r9 + lsl r8 + rol r9 + or r8,r20 + or r9,r21 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Y+17 + ldd r3,Y+18 + ldd r4,Y+19 + ldd r5,Y+20 + ldd r6,Y+21 + ldd r7,Y+22 + ldd r8,Y+23 + ldd r9,Y+24 + ldd r10,Y+25 + ldd r11,Y+26 + ldd r12,Y+27 + ldd r13,Y+28 + ldd r14,Y+29 + ldd r15,Y+30 + ldd r24,Y+31 + ldd r25,Y+32 + ldd r0,Z+48 + eor r10,r0 + ldd r0,Z+49 + eor r11,r0 + ldd r0,Z+50 + eor r12,r0 + ldd r0,Z+51 + eor r13,r0 + ldd r0,Z+52 + eor r6,r0 + ldd r0,Z+53 + eor r7,r0 + ldd r0,Z+54 + eor r8,r0 + ldd r0,Z+55 + eor r9,r0 + ldd r0,Z+56 + eor r14,r0 + ldd r0,Z+57 + eor r15,r0 + ldd r0,Z+58 + eor r24,r0 + ldd r0,Z+59 + eor r25,r0 + ldd r0,Z+60 + eor r2,r0 + ldd r0,Z+61 + eor r3,r0 + ldd r0,Z+62 + eor r4,r0 + ldd r0,Z+63 + eor r5,r0 + movw r20,r10 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r10,r22 + and r11,r22 + lsl r10 + rol r11 + or r10,r20 + or r11,r21 + movw r20,r12 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r12,r22 + and r13,r22 + lsl r12 + rol r13 + lsl r12 + rol r13 + lsl r12 + rol r13 + or r12,r20 + or r13,r21 + movw r20,r6 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r6,r22 + and r7,r22 + lsl r6 + rol r7 + or r6,r20 + or r7,r21 + movw r20,r8 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r8,r22 + and r9,r22 + lsl r8 + rol r9 + lsl r8 + rol r9 + lsl r8 + rol r9 + or r8,r20 + or r9,r21 + movw r20,r14 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r14,r22 + and r15,r22 + lsl r14 + rol r15 + or r14,r20 + or r15,r21 + movw r20,r24 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r24,r22 + and r25,r22 + lsl r24 + rol r25 + lsl r24 + rol r25 + lsl r24 + rol r25 + or r24,r20 + or r25,r21 + movw r20,r2 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r2,r22 + and r3,r22 + lsl r2 + rol r3 + or r2,r20 + or r3,r21 + movw r20,r4 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r4,r22 + and r5,r22 + lsl r4 + rol r5 + lsl r4 + rol r5 + lsl r4 + rol r5 + or r4,r20 + or r5,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+5 + ldd r21,Y+6 + eor r20,r2 + eor r21,r3 + ldd r22,Y+1 + ldd r23,Y+2 + eor r22,r14 + eor r23,r15 + std Y+5,r22 + std Y+6,r23 + ldd r22,Y+13 + ldd r23,Y+14 + eor r22,r6 + eor r23,r7 + std Y+1,r22 + std Y+2,r23 + ldd r22,Y+9 + ldd r23,Y+10 + eor r22,r10 + eor r23,r11 + eor r20,r22 + eor r21,r23 + std Y+13,r22 + std Y+14,r23 + std Y+9,r20 + std Y+10,r21 + ldd r20,Y+7 + ldd r21,Y+8 + eor r20,r4 + eor r21,r5 + ldd r22,Y+3 + ldd r23,Y+4 + eor r22,r24 + eor r23,r25 + std Y+7,r22 + std Y+8,r23 + ldd r22,Y+15 + ldd r23,Y+16 + eor r22,r8 + eor r23,r9 + std Y+3,r22 + std Y+4,r23 + ldd r22,Y+11 + ldd r23,Y+12 + eor r22,r12 + eor r23,r13 + eor r20,r22 + eor r21,r23 + std Y+15,r22 + std Y+16,r23 + std Y+11,r20 + std Y+12,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+1 + ldd r23,Y+2 + std Y+5,r22 + std Y+6,r23 + ldd r22,Y+13 + ldd r23,Y+14 + std Y+1,r22 + std Y+2,r23 + ldd r22,Y+9 + ldd r23,Y+10 + eor r20,r22 + eor r21,r23 + std Y+13,r22 + std Y+14,r23 + std Y+9,r20 + std Y+10,r21 + ldd r20,Y+7 + ldd r21,Y+8 + ldd r22,Y+3 + ldd r23,Y+4 + std Y+7,r22 + std Y+8,r23 + ldd r22,Y+15 + ldd r23,Y+16 + std Y+3,r22 + std Y+4,r23 + ldd r22,Y+11 + ldd r23,Y+12 + eor r20,r22 + eor r21,r23 + std Y+15,r22 + std Y+16,r23 + std Y+11,r20 + std Y+12,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + movw r20,r2 + movw r22,r4 + movw r2,r14 + movw r4,r24 + movw r14,r6 + movw r24,r8 + movw r6,r10 + movw r8,r12 + movw r10,r20 + movw r12,r22 + eor r10,r6 + eor r11,r7 + eor r12,r8 + eor r13,r9 + ldd r0,Y+9 + eor r0,r10 + std Y+9,r0 + ldd r0,Y+10 + eor r0,r11 + std Y+10,r0 + ldd r0,Y+11 + eor r0,r12 + std Y+11,r0 + ldd r0,Y+12 + eor r0,r13 + std Y+12,r0 + ldd r0,Y+13 + eor r0,r6 + std Y+13,r0 + ldd r0,Y+14 + eor r0,r7 + std Y+14,r0 + ldd r0,Y+15 + eor r0,r8 + std Y+15,r0 + ldd r0,Y+16 + eor r0,r9 + std Y+16,r0 + ldd r0,Y+1 + eor r0,r14 + std Y+1,r0 + ldd r0,Y+2 + eor r0,r15 + std Y+2,r0 + ldd r0,Y+3 + eor r0,r24 + std Y+3,r0 + ldd r0,Y+4 + eor r0,r25 + std Y+4,r0 + ldd r0,Y+5 + eor r0,r2 + std Y+5,r0 + ldd r0,Y+6 + eor r0,r3 + std Y+6,r0 + ldd r0,Y+7 + eor r0,r4 + std Y+7,r0 + ldd r0,Y+8 + eor r0,r5 + std Y+8,r0 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r3,Y+2 + ldd r4,Y+3 + ldd r5,Y+4 + ldd r6,Y+5 + ldd r7,Y+6 + ldd r8,Y+7 + ldd r9,Y+8 + ldd r10,Y+9 + ldd r11,Y+10 + ldd r12,Y+11 + ldd r13,Y+12 + ldd r14,Y+13 + ldd r15,Y+14 + ldd r24,Y+15 + ldd r25,Y+16 + movw r20,r8 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r8,r22 + and r9,r22 + lsl r8 + rol r9 + lsl r8 + rol r9 + or r8,r20 + or r9,r21 + movw r20,r12 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r12,r22 + and r13,r22 + lsl r12 + rol r13 + lsl r12 + rol r13 + or r12,r20 + or r13,r21 + movw r20,r24 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r24,r22 + and r25,r22 + lsl r24 + rol r25 + lsl r24 + rol r25 + or r24,r20 + or r25,r21 + movw r20,r4 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r4,r22 + and r5,r22 + lsl r4 + rol r5 + lsl r4 + rol r5 + or r4,r20 + or r5,r21 + mov r0,r10 + or r0,r2 + eor r6,r0 + mov r0,r11 + or r0,r3 + eor r7,r0 + mov r0,r12 + or r0,r4 + eor r8,r0 + mov r0,r13 + or r0,r5 + eor r9,r0 + mov r0,r6 + or r0,r14 + eor r10,r0 + mov r0,r7 + or r0,r15 + eor r11,r0 + mov r0,r8 + or r0,r24 + eor r12,r0 + mov r0,r9 + or r0,r25 + eor r13,r0 + mov r0,r10 + and r0,r2 + eor r14,r0 + mov r0,r11 + and r0,r3 + eor r15,r0 + mov r0,r12 + and r0,r4 + eor r24,r0 + mov r0,r13 + and r0,r5 + eor r25,r0 + mov r0,r10 + or r0,r14 + eor r2,r0 + mov r0,r11 + or r0,r15 + eor r3,r0 + mov r0,r12 + or r0,r24 + eor r4,r0 + mov r0,r13 + or r0,r25 + eor r5,r0 + mov r0,r6 + or r0,r2 + eor r10,r0 + mov r0,r7 + or r0,r3 + eor r11,r0 + mov r0,r8 + or r0,r4 + eor r12,r0 + mov r0,r9 + or r0,r5 + eor r13,r0 + mov r0,r10 + and r0,r14 + eor r6,r0 + mov r0,r11 + and r0,r15 + eor r7,r0 + mov r0,r12 + and r0,r24 + eor r8,r0 + mov r0,r13 + and r0,r25 + eor r9,r0 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Y+17 + ldd r3,Y+18 + ldd r4,Y+19 + ldd r5,Y+20 + ldd r6,Y+21 + ldd r7,Y+22 + ldd r8,Y+23 + ldd r9,Y+24 + ldd r10,Y+25 + ldd r11,Y+26 + ldd r12,Y+27 + ldd r13,Y+28 + ldd r14,Y+29 + ldd r15,Y+30 + ldd r24,Y+31 + ldd r25,Y+32 + movw r20,r24 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r24,r22 + and r25,r22 + lsl r24 + rol r25 + or r24,r20 + or r25,r21 + movw r20,r14 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r14,r22 + and r15,r22 + lsl r14 + rol r15 + lsl r14 + rol r15 + lsl r14 + rol r15 + or r14,r20 + or r15,r21 + movw r20,r8 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r8,r22 + and r9,r22 + lsl r8 + rol r9 + or r8,r20 + or r9,r21 + movw r20,r6 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r6,r22 + and r7,r22 + lsl r6 + rol r7 + lsl r6 + rol r7 + lsl r6 + rol r7 + or r6,r20 + or r7,r21 + movw r20,r4 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r4,r22 + and r5,r22 + lsl r4 + rol r5 + or r4,r20 + or r5,r21 + movw r20,r2 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r2,r22 + and r3,r22 + lsl r2 + rol r3 + lsl r2 + rol r3 + lsl r2 + rol r3 + or r2,r20 + or r3,r21 + movw r20,r12 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r12,r22 + and r13,r22 + lsl r12 + rol r13 + or r12,r20 + or r13,r21 + movw r20,r10 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r10,r22 + and r11,r22 + lsl r10 + rol r11 + lsl r10 + rol r11 + lsl r10 + rol r11 + or r10,r20 + or r11,r21 + mov r0,r6 + or r0,r10 + eor r14,r0 + mov r0,r7 + or r0,r11 + eor r15,r0 + mov r0,r8 + or r0,r12 + eor r24,r0 + mov r0,r9 + or r0,r13 + eor r25,r0 + mov r0,r14 + or r0,r2 + eor r6,r0 + mov r0,r15 + or r0,r3 + eor r7,r0 + mov r0,r24 + or r0,r4 + eor r8,r0 + mov r0,r25 + or r0,r5 + eor r9,r0 + mov r0,r6 + and r0,r10 + eor r2,r0 + mov r0,r7 + and r0,r11 + eor r3,r0 + mov r0,r8 + and r0,r12 + eor r4,r0 + mov r0,r9 + and r0,r13 + eor r5,r0 + mov r0,r6 + or r0,r2 + eor r10,r0 + mov r0,r7 + or r0,r3 + eor r11,r0 + mov r0,r8 + or r0,r4 + eor r12,r0 + mov r0,r9 + or r0,r5 + eor r13,r0 + mov r0,r14 + or r0,r10 + eor r6,r0 + mov r0,r15 + or r0,r11 + eor r7,r0 + mov r0,r24 + or r0,r12 + eor r8,r0 + mov r0,r25 + or r0,r13 + eor r9,r0 + mov r0,r6 + and r0,r2 + eor r14,r0 + mov r0,r7 + and r0,r3 + eor r15,r0 + mov r0,r8 + and r0,r4 + eor r24,r0 + mov r0,r9 + and r0,r5 + eor r25,r0 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+1 + ldd r21,Y+2 + eor r20,r10 + eor r21,r11 + ldd r22,Y+13 + ldd r23,Y+14 + eor r22,r2 + eor r23,r3 + std Y+1,r22 + std Y+2,r23 + ldd r22,Y+9 + ldd r23,Y+10 + eor r22,r6 + eor r23,r7 + std Y+13,r22 + std Y+14,r23 + ldd r22,Y+5 + ldd r23,Y+6 + eor r22,r14 + eor r23,r15 + eor r20,r22 + eor r21,r23 + std Y+9,r22 + std Y+10,r23 + std Y+5,r20 + std Y+6,r21 + ldd r20,Y+3 + ldd r21,Y+4 + eor r20,r12 + eor r21,r13 + ldd r22,Y+15 + ldd r23,Y+16 + eor r22,r4 + eor r23,r5 + std Y+3,r22 + std Y+4,r23 + ldd r22,Y+11 + ldd r23,Y+12 + eor r22,r8 + eor r23,r9 + std Y+15,r22 + std Y+16,r23 + ldd r22,Y+7 + ldd r23,Y+8 + eor r22,r24 + eor r23,r25 + eor r20,r22 + eor r21,r23 + std Y+11,r22 + std Y+12,r23 + std Y+7,r20 + std Y+8,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+13 + ldd r23,Y+14 + std Y+1,r22 + std Y+2,r23 + ldd r22,Y+9 + ldd r23,Y+10 + std Y+13,r22 + std Y+14,r23 + ldd r22,Y+5 + ldd r23,Y+6 + eor r20,r22 + eor r21,r23 + std Y+9,r22 + std Y+10,r23 + std Y+5,r20 + std Y+6,r21 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r22,Y+15 + ldd r23,Y+16 + std Y+3,r22 + std Y+4,r23 + ldd r22,Y+11 + ldd r23,Y+12 + std Y+15,r22 + std Y+16,r23 + ldd r22,Y+7 + ldd r23,Y+8 + eor r20,r22 + eor r21,r23 + std Y+11,r22 + std Y+12,r23 + std Y+7,r20 + std Y+8,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + movw r20,r10 + movw r22,r12 + movw r10,r2 + movw r12,r4 + movw r2,r6 + movw r4,r8 + movw r6,r14 + movw r8,r24 + movw r14,r20 + movw r24,r22 + eor r14,r6 + eor r15,r7 + eor r24,r8 + eor r25,r9 + ldd r0,Y+5 + eor r0,r14 + std Y+5,r0 + ldd r0,Y+6 + eor r0,r15 + std Y+6,r0 + ldd r0,Y+7 + eor r0,r24 + std Y+7,r0 + ldd r0,Y+8 + eor r0,r25 + std Y+8,r0 + ldd r0,Y+9 + eor r0,r6 + std Y+9,r0 + ldd r0,Y+10 + eor r0,r7 + std Y+10,r0 + ldd r0,Y+11 + eor r0,r8 + std Y+11,r0 + ldd r0,Y+12 + eor r0,r9 + std Y+12,r0 + ldd r0,Y+13 + eor r0,r2 + std Y+13,r0 + ldd r0,Y+14 + eor r0,r3 + std Y+14,r0 + ldd r0,Y+15 + eor r0,r4 + std Y+15,r0 + ldd r0,Y+16 + eor r0,r5 + std Y+16,r0 + ldd r0,Y+1 + eor r0,r10 + std Y+1,r0 + ldd r0,Y+2 + eor r0,r11 + std Y+2,r0 + ldd r0,Y+3 + eor r0,r12 + std Y+3,r0 + ldd r0,Y+4 + eor r0,r13 + std Y+4,r0 + mov r0,r6 + or r0,r14 + eor r2,r0 + mov r0,r7 + or r0,r15 + eor r3,r0 + mov r0,r8 + or r0,r24 + eor r4,r0 + mov r0,r9 + or r0,r25 + eor r5,r0 + mov r0,r2 + or r0,r10 + eor r6,r0 + mov r0,r3 + or r0,r11 + eor r7,r0 + mov r0,r4 + or r0,r12 + eor r8,r0 + mov r0,r5 + or r0,r13 + eor r9,r0 + mov r0,r6 + and r0,r14 + eor r10,r0 + mov r0,r7 + and r0,r15 + eor r11,r0 + mov r0,r8 + and r0,r24 + eor r12,r0 + mov r0,r9 + and r0,r25 + eor r13,r0 + mov r0,r6 + or r0,r10 + eor r14,r0 + mov r0,r7 + or r0,r11 + eor r15,r0 + mov r0,r8 + or r0,r12 + eor r24,r0 + mov r0,r9 + or r0,r13 + eor r25,r0 + mov r0,r2 + or r0,r14 + eor r6,r0 + mov r0,r3 + or r0,r15 + eor r7,r0 + mov r0,r4 + or r0,r24 + eor r8,r0 + mov r0,r5 + or r0,r25 + eor r9,r0 + mov r0,r6 + and r0,r10 + eor r2,r0 + mov r0,r7 + and r0,r11 + eor r3,r0 + mov r0,r8 + and r0,r12 + eor r4,r0 + mov r0,r9 + and r0,r13 + eor r5,r0 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r3,Y+2 + ldd r4,Y+3 + ldd r5,Y+4 + ldd r6,Y+5 + ldd r7,Y+6 + ldd r8,Y+7 + ldd r9,Y+8 + ldd r10,Y+9 + ldd r11,Y+10 + ldd r12,Y+11 + ldd r13,Y+12 + ldd r14,Y+13 + ldd r15,Y+14 + ldd r24,Y+15 + ldd r25,Y+16 + mov r0,r6 + or r0,r14 + eor r2,r0 + mov r0,r7 + or r0,r15 + eor r3,r0 + mov r0,r8 + or r0,r24 + eor r4,r0 + mov r0,r9 + or r0,r25 + eor r5,r0 + mov r0,r2 + or r0,r10 + eor r6,r0 + mov r0,r3 + or r0,r11 + eor r7,r0 + mov r0,r4 + or r0,r12 + eor r8,r0 + mov r0,r5 + or r0,r13 + eor r9,r0 + mov r0,r6 + and r0,r14 + eor r10,r0 + mov r0,r7 + and r0,r15 + eor r11,r0 + mov r0,r8 + and r0,r24 + eor r12,r0 + mov r0,r9 + and r0,r25 + eor r13,r0 + mov r0,r6 + or r0,r10 + eor r14,r0 + mov r0,r7 + or r0,r11 + eor r15,r0 + mov r0,r8 + or r0,r12 + eor r24,r0 + mov r0,r9 + or r0,r13 + eor r25,r0 + mov r0,r2 + or r0,r14 + eor r6,r0 + mov r0,r3 + or r0,r15 + eor r7,r0 + mov r0,r4 + or r0,r24 + eor r8,r0 + mov r0,r5 + or r0,r25 + eor r9,r0 + mov r0,r6 + and r0,r10 + eor r2,r0 + mov r0,r7 + and r0,r11 + eor r3,r0 + mov r0,r8 + and r0,r12 + eor r4,r0 + mov r0,r9 + and r0,r13 + eor r5,r0 + ret +2560: + ldd r26,Y+33 + ldd r27,Y+34 + ld r0,Z + eor r2,r0 + ldd r0,Z+1 + eor r3,r0 + ldd r0,Z+2 + eor r4,r0 + ldd r0,Z+3 + eor r5,r0 + ldd r0,Z+4 + eor r6,r0 + ldd r0,Z+5 + eor r7,r0 + ldd r0,Z+6 + eor r8,r0 + ldd r0,Z+7 + eor r9,r0 + ldd r0,Z+8 + eor r10,r0 + ldd r0,Z+9 + eor r11,r0 + ldd r0,Z+10 + eor r12,r0 + ldd r0,Z+11 + eor r13,r0 + ldd r0,Z+12 + eor r14,r0 + ldd r0,Z+13 + eor r15,r0 + ldd r0,Z+14 + eor r24,r0 + ldd r0,Z+15 + eor r25,r0 + st X+,r2 + st X+,r3 + st X+,r6 + st X+,r7 + st X+,r10 + st X+,r11 + st X+,r14 + st X+,r15 + ldd r20,Y+17 + ldd r21,Y+18 + ldd r0,Z+16 + eor r20,r0 + ldd r0,Z+17 + eor r21,r0 + st X+,r20 + st X+,r21 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r0,Z+20 + eor r20,r0 + ldd r0,Z+21 + eor r21,r0 + st X+,r20 + st X+,r21 + ldd r20,Y+25 + ldd r21,Y+26 + ldd r0,Z+24 + eor r20,r0 + ldd r0,Z+25 + eor r21,r0 + st X+,r20 + st X+,r21 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r0,Z+28 + eor r20,r0 + ldd r0,Z+29 + eor r21,r0 + st X+,r20 + st X+,r21 + st X+,r4 + st X+,r5 + st X+,r8 + st X+,r9 + st X+,r12 + st X+,r13 + st X+,r24 + st X+,r25 + ldd r20,Y+19 + ldd r21,Y+20 + ldd r0,Z+18 + eor r20,r0 + ldd r0,Z+19 + eor r21,r0 + st X+,r20 + st X+,r21 + ldd r20,Y+23 + ldd r21,Y+24 + ldd r0,Z+22 + eor r20,r0 + ldd r0,Z+23 + eor r21,r0 + st X+,r20 + st X+,r21 + ldd r20,Y+27 + ldd r21,Y+28 + ldd r0,Z+26 + eor r20,r0 + ldd r0,Z+27 + eor r21,r0 + st X+,r20 + st X+,r21 + ldd r20,Y+31 + ldd r21,Y+32 + ldd r0,Z+30 + eor r20,r0 + ldd r0,Z+31 + eor r21,r0 + st X+,r20 + st X+,r21 + adiw r28,34 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size saturnin_decrypt_block, .-saturnin_decrypt_block + +#endif diff --git a/saturnin/Implementations/crypto_aead/saturninshortv2/rhys/internal-saturnin.c b/saturnin/Implementations/crypto_aead/saturninshortv2/rhys/internal-saturnin.c new file mode 100644 index 0000000..f4be50d --- /dev/null +++ b/saturnin/Implementations/crypto_aead/saturninshortv2/rhys/internal-saturnin.c @@ -0,0 +1,483 @@ +/* + * Copyright (C) 2020 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "internal-saturnin.h" + +#if !defined(__AVR__) + +/* Round constants for various combinations of rounds and domain_sep */ +static uint32_t const saturnin_rc[] = { + /* RC_10_1 */ + 0x4eb026c2, 0x90595303, 0xaa8fe632, 0xfe928a92, 0x4115a419, + 0x93539532, 0x5db1cc4e, 0x541515ca, 0xbd1f55a8, 0x5a6e1a0d, + /* RC_10_2 */ + 0x4e4526b5, 0xa3565ff0, 0x0f8f20d8, 0x0b54bee1, 0x7d1a6c9d, + 0x17a6280a, 0xaa46c986, 0xc1199062, 0x182c5cde, 0xa00d53fe, + /* RC_10_3 */ + 0x4e162698, 0xb2535ba1, 0x6c8f9d65, 0x5816ad30, 0x691fd4fa, + 0x6bf5bcf9, 0xf8eb3525, 0xb21decfa, 0x7b3da417, 0xf62c94b4, + /* RC_10_4 */ + 0x4faf265b, 0xc5484616, 0x45dcad21, 0xe08bd607, 0x0504fdb8, + 0x1e1f5257, 0x45fbc216, 0xeb529b1f, 0x52194e32, 0x5498c018, + /* RC_10_5 */ + 0x4ffc2676, 0xd44d4247, 0x26dc109c, 0xb3c9c5d6, 0x110145df, + 0x624cc6a4, 0x17563eb5, 0x9856e787, 0x3108b6fb, 0x02b90752, + /* RC_10_6 */ + 0x4f092601, 0xe7424eb4, 0x83dcd676, 0x460ff1a5, 0x2d0e8d5b, + 0xe6b97b9c, 0xe0a13b7d, 0x0d5a622f, 0x943bbf8d, 0xf8da4ea1, + /* RC_16_7 */ + 0x3fba180c, 0x563ab9ab, 0x125ea5ef, 0x859da26c, 0xb8cf779b, + 0x7d4de793, 0x07efb49f, 0x8d525306, 0x1e08e6ab, 0x41729f87, + 0x8c4aef0a, 0x4aa0c9a7, 0xd93a95ef, 0xbb00d2af, 0xb62c5bf0, + 0x386d94d8, + /* RC_16_8 */ + 0x3c9b19a7, 0xa9098694, 0x23f878da, 0xa7b647d3, 0x74fc9d78, + 0xeacaae11, 0x2f31a677, 0x4cc8c054, 0x2f51ca05, 0x5268f195, + 0x4f5b8a2b, 0xf614b4ac, 0xf1d95401, 0x764d2568, 0x6a493611, + 0x8eef9c3e +}; + +/* Loads a 32-bit word from the two halves of a 256-bit Saturnin input block */ +#define saturnin_load_word32(ptr) \ + ((((uint32_t)((ptr)[17])) << 24) | \ + (((uint32_t)((ptr)[16])) << 16) | \ + (((uint32_t)((ptr)[1])) << 8) | \ + ((uint32_t)((ptr)[0]))) + +/* Stores a 32-bit word to the two halves of a 256-bit Saturnin output block */ +#define saturnin_store_word32(ptr, x) \ + do { \ + (ptr)[0] = (uint8_t)(x); \ + (ptr)[1] = (uint8_t)((x) >> 8); \ + (ptr)[16] = (uint8_t)((x) >> 16); \ + (ptr)[17] = (uint8_t)((x) >> 24); \ + } while (0) + +/* Rotate the 4-bit nibbles within a 16-bit word left */ +#define leftRotate4_N(a, mask1, bits1, mask2, bits2) \ + do { \ + (a) = (((a) & (mask1)) << (bits1)) | \ + (((a) & ((mask1) ^ (uint32_t)0xFFFFU)) >> (4 - (bits1))) | \ + (((a) & (((uint32_t)(mask2)) << 16)) << (bits2)) | \ + (((a) & (((uint32_t)((mask2)) << 16) ^ 0xFFFF0000U)) >> (4 - (bits2))); \ + } while (0) + +/* Rotate 16-bit subwords left */ +#define leftRotate16_N(a, mask1, bits1, mask2, bits2) \ + do { \ + (a) = (((a) & (mask1)) << (bits1)) | \ + (((a) & ((mask1) ^ (uint32_t)0xFFFFU)) >> (16 - (bits1))) | \ + (((a) & (((uint32_t)(mask2)) << 16)) << (bits2)) | \ + (((a) & (((uint32_t)((mask2)) << 16) ^ 0xFFFF0000U)) >> (16 - (bits2))); \ + } while (0) + +/** + * \brief XOR the key into the Saturnin state. + * + * \param x0 First word of the bit-sliced state. + * \param x1 Second word of the bit-sliced state. + * \param x2 Third word of the bit-sliced state. + * \param x3 Fourth word of the bit-sliced state. + * \param x4 Fifth word of the bit-sliced state. + * \param x5 Sixth word of the bit-sliced state. + * \param x6 Seventh word of the bit-sliced state. + * \param x7 Eighth word of the bit-sliced state. + */ +#define saturnin_xor_key(x0, x1, x2, x3, x4, x5, x6, x7) \ + do { \ + x0 ^= ks->k[0]; \ + x1 ^= ks->k[1]; \ + x2 ^= ks->k[2]; \ + x3 ^= ks->k[3]; \ + x4 ^= ks->k[4]; \ + x5 ^= ks->k[5]; \ + x6 ^= ks->k[6]; \ + x7 ^= ks->k[7]; \ + } while (0) + +/** + * \brief XOR a rotated version of the key into the Saturnin state. + * + * \param x0 First word of the bit-sliced state. + * \param x1 Second word of the bit-sliced state. + * \param x2 Third word of the bit-sliced state. + * \param x3 Fourth word of the bit-sliced state. + * \param x4 Fifth word of the bit-sliced state. + * \param x5 Sixth word of the bit-sliced state. + * \param x6 Seventh word of the bit-sliced state. + * \param x7 Eighth word of the bit-sliced state. + */ +#define saturnin_xor_key_rotated(x0, x1, x2, x3, x4, x5, x6, x7) \ + do { \ + x0 ^= ks->k[8]; \ + x1 ^= ks->k[9]; \ + x2 ^= ks->k[10]; \ + x3 ^= ks->k[11]; \ + x4 ^= ks->k[12]; \ + x5 ^= ks->k[13]; \ + x6 ^= ks->k[14]; \ + x7 ^= ks->k[15]; \ + } while (0) + +/** + * \brief Applies the Saturnin S-box to a bit-sliced set of nibbles. + * + * \param a First bit-slice. + * \param b Second bit-slice. + * \param c Third bit-slice. + * \param d Fourth bit-slice. + * + * The S-box also involves a rotation on the output words. We perform the + * rotation implicitly in the higher layers. + */ +#define saturnin_sbox(a, b, c, d) \ + do { \ + (a) ^= (b) & (c); \ + (b) ^= (a) | (d); \ + (d) ^= (b) | (c); \ + (c) ^= (b) & (d); \ + (b) ^= (a) | (c); \ + (a) ^= (b) | (d); \ + } while (0) + +/** + * \brief Applies the inverse of the Saturnin S-box to a set of nibbles. + * + * \param a First bit-slice. + * \param b Second bit-slice. + * \param c Third bit-slice. + * \param d Fourth bit-slice. + * + * The inverse of the S-box also involves a rotation on the input words. + * We perform the rotation implicitly in the higher layers. + */ +#define saturnin_sbox_inverse(a, b, c, d) \ + do { \ + (a) ^= (b) | (d); \ + (b) ^= (a) | (c); \ + (c) ^= (b) & (d); \ + (d) ^= (b) | (c); \ + (b) ^= (a) | (d); \ + (a) ^= (b) & (c); \ + } while (0) + +/* Helpers for MDS matrix operations */ +#define SWAP(a) (((a) << 16) | ((a) >> 16)) +#define MUL(x0, x1, x2, x3) \ + do { \ + temp = x0; x0 = x1; x1 = x2; x2 = x3; x3 = temp ^ x0; \ + } while (0) +#define MULINV(x0, x1, x2, x3) \ + do { \ + temp = x3; x3 = x2; x2 = x1; x1 = x0; x0 = x1 ^ temp; \ + } while (0) + +/** + * \brief Applies the MDS matrix to the Saturnin state. + * + * \param x0 First word of the bit-sliced state. + * \param x1 Second word of the bit-sliced state. + * \param x2 Third word of the bit-sliced state. + * \param x3 Fourth word of the bit-sliced state. + * \param x4 Fifth word of the bit-sliced state. + * \param x5 Sixth word of the bit-sliced state. + * \param x6 Seventh word of the bit-sliced state. + * \param x7 Eighth word of the bit-sliced state. + */ +#define saturnin_mds(x0, x1, x2, x3, x4, x5, x6, x7) \ + do { \ + x0 ^= x4; x1 ^= x5; x2 ^= x6; x3 ^= x7; \ + MUL(x4, x5, x6, x7); \ + x4 ^= SWAP(x0); x5 ^= SWAP(x1); \ + x6 ^= SWAP(x2); x7 ^= SWAP(x3); \ + MUL(x0, x1, x2, x3); \ + MUL(x0, x1, x2, x3); \ + x0 ^= x4; x1 ^= x5; x2 ^= x6; x3 ^= x7; \ + x4 ^= SWAP(x0); x5 ^= SWAP(x1); \ + x6 ^= SWAP(x2); x7 ^= SWAP(x3); \ + } while (0) + +/** + * \brief Applies the inverse of the MDS matrix to the Saturnin state. + * + * \param x0 First word of the bit-sliced state. + * \param x1 Second word of the bit-sliced state. + * \param x2 Third word of the bit-sliced state. + * \param x3 Fourth word of the bit-sliced state. + * \param x4 Fifth word of the bit-sliced state. + * \param x5 Sixth word of the bit-sliced state. + * \param x6 Seventh word of the bit-sliced state. + * \param x7 Eighth word of the bit-sliced state. + */ +#define saturnin_mds_inverse(x0, x1, x2, x3, x4, x5, x6, x7) \ + do { \ + x6 ^= SWAP(x2); x7 ^= SWAP(x3); \ + x4 ^= SWAP(x0); x5 ^= SWAP(x1); \ + x0 ^= x4; x1 ^= x5; x2 ^= x6; x3 ^= x7; \ + MULINV(x0, x1, x2, x3); \ + MULINV(x0, x1, x2, x3); \ + x6 ^= SWAP(x2); x7 ^= SWAP(x3); \ + x4 ^= SWAP(x0); x5 ^= SWAP(x1); \ + MULINV(x4, x5, x6, x7); \ + x0 ^= x4; x1 ^= x5; x2 ^= x6; x3 ^= x7; \ + } while (0) + +/** + * \brief Applies the slice permutation to the Saturnin state. + * + * \param x0 First word of the bit-sliced state. + * \param x1 Second word of the bit-sliced state. + * \param x2 Third word of the bit-sliced state. + * \param x3 Fourth word of the bit-sliced state. + * \param x4 Fifth word of the bit-sliced state. + * \param x5 Sixth word of the bit-sliced state. + * \param x6 Seventh word of the bit-sliced state. + * \param x7 Eighth word of the bit-sliced state. + */ +#define saturnin_slice(x0, x1, x2, x3, x4, x5, x6, x7) \ + do { \ + leftRotate4_N(x0, 0xFFFFU, 0, 0x3333, 2); \ + leftRotate4_N(x1, 0xFFFFU, 0, 0x3333, 2); \ + leftRotate4_N(x2, 0xFFFFU, 0, 0x3333, 2); \ + leftRotate4_N(x3, 0xFFFFU, 0, 0x3333, 2); \ + leftRotate4_N(x4, 0x7777U, 1, 0x1111, 3); \ + leftRotate4_N(x5, 0x7777U, 1, 0x1111, 3); \ + leftRotate4_N(x6, 0x7777U, 1, 0x1111, 3); \ + leftRotate4_N(x7, 0x7777U, 1, 0x1111, 3); \ + } while (0) + +/** + * \brief Applies the inverse of the slice permutation to the Saturnin state. + * + * \param x0 First word of the bit-sliced state. + * \param x1 Second word of the bit-sliced state. + * \param x2 Third word of the bit-sliced state. + * \param x3 Fourth word of the bit-sliced state. + * \param x4 Fifth word of the bit-sliced state. + * \param x5 Sixth word of the bit-sliced state. + * \param x6 Seventh word of the bit-sliced state. + * \param x7 Eighth word of the bit-sliced state. + */ +#define saturnin_slice_inverse(x0, x1, x2, x3, x4, x5, x6, x7) \ + do { \ + leftRotate4_N(x0, 0xFFFFU, 0, 0x3333, 2); \ + leftRotate4_N(x1, 0xFFFFU, 0, 0x3333, 2); \ + leftRotate4_N(x2, 0xFFFFU, 0, 0x3333, 2); \ + leftRotate4_N(x3, 0xFFFFU, 0, 0x3333, 2); \ + leftRotate4_N(x4, 0x1111U, 3, 0x7777, 1); \ + leftRotate4_N(x5, 0x1111U, 3, 0x7777, 1); \ + leftRotate4_N(x6, 0x1111U, 3, 0x7777, 1); \ + leftRotate4_N(x7, 0x1111U, 3, 0x7777, 1); \ + } while (0) + +/** + * \brief Applies the sheet permutation to the Saturnin state. + * + * \param x0 First word of the bit-sliced state. + * \param x1 Second word of the bit-sliced state. + * \param x2 Third word of the bit-sliced state. + * \param x3 Fourth word of the bit-sliced state. + * \param x4 Fifth word of the bit-sliced state. + * \param x5 Sixth word of the bit-sliced state. + * \param x6 Seventh word of the bit-sliced state. + * \param x7 Eighth word of the bit-sliced state. + */ +#define saturnin_sheet(x0, x1, x2, x3, x4, x5, x6, x7) \ + do { \ + leftRotate16_N(x0, 0xFFFFU, 0, 0x00FF, 8); \ + leftRotate16_N(x1, 0xFFFFU, 0, 0x00FF, 8); \ + leftRotate16_N(x2, 0xFFFFU, 0, 0x00FF, 8); \ + leftRotate16_N(x3, 0xFFFFU, 0, 0x00FF, 8); \ + leftRotate16_N(x4, 0x0FFFU, 4, 0x000F, 12); \ + leftRotate16_N(x5, 0x0FFFU, 4, 0x000F, 12); \ + leftRotate16_N(x6, 0x0FFFU, 4, 0x000F, 12); \ + leftRotate16_N(x7, 0x0FFFU, 4, 0x000F, 12); \ + } while (0) + +/** + * \brief Applies the inverse of the sheet permutation to the Saturnin state. + * + * \param x0 First word of the bit-sliced state. + * \param x1 Second word of the bit-sliced state. + * \param x2 Third word of the bit-sliced state. + * \param x3 Fourth word of the bit-sliced state. + * \param x4 Fifth word of the bit-sliced state. + * \param x5 Sixth word of the bit-sliced state. + * \param x6 Seventh word of the bit-sliced state. + * \param x7 Eighth word of the bit-sliced state. + */ +#define saturnin_sheet_inverse(x0, x1, x2, x3, x4, x5, x6, x7) \ + do { \ + leftRotate16_N(x0, 0xFFFFU, 0, 0x00FF, 8); \ + leftRotate16_N(x1, 0xFFFFU, 0, 0x00FF, 8); \ + leftRotate16_N(x2, 0xFFFFU, 0, 0x00FF, 8); \ + leftRotate16_N(x3, 0xFFFFU, 0, 0x00FF, 8); \ + leftRotate16_N(x4, 0x000FU, 12, 0x0FFF, 4); \ + leftRotate16_N(x5, 0x000FU, 12, 0x0FFF, 4); \ + leftRotate16_N(x6, 0x000FU, 12, 0x0FFF, 4); \ + leftRotate16_N(x7, 0x000FU, 12, 0x0FFF, 4); \ + } while (0) + +void saturnin_setup_key + (saturnin_key_schedule_t *ks, const unsigned char *key) +{ + int index; + uint32_t temp; + for (index = 0; index < 16; index += 2) { + temp = saturnin_load_word32(key + index); + ks->k[index / 2] = temp; + ks->k[8 + (index / 2)] = ((temp & 0x001F001FU) << 11) | + ((temp >> 5) & 0x07FF07FFU); + } +} + +void saturnin_encrypt_block + (const saturnin_key_schedule_t *ks, unsigned char *output, + const unsigned char *input, unsigned domain) +{ + unsigned rounds = (domain >= SATURNIN_DOMAIN_16_7) ? 8 : 5; + const uint32_t *rc = saturnin_rc + domain; + uint32_t x0, x1, x2, x3, x4, x5, x6, x7, temp; + + /* Load the input into local variables */ + x0 = saturnin_load_word32(input); + x1 = saturnin_load_word32(input + 2); + x2 = saturnin_load_word32(input + 4); + x3 = saturnin_load_word32(input + 6); + x4 = saturnin_load_word32(input + 8); + x5 = saturnin_load_word32(input + 10); + x6 = saturnin_load_word32(input + 12); + x7 = saturnin_load_word32(input + 14); + + /* XOR the key into the state */ + saturnin_xor_key(x0, x1, x2, x3, x4, x5, x6, x7); + + /* Perform all encryption rounds, two at a time */ + for (; rounds > 0; --rounds, rc += 2) { + /* Even rounds */ + saturnin_sbox(x0, x1, x2, x3); + saturnin_sbox(x4, x5, x6, x7); + saturnin_mds(x1, x2, x3, x0, x7, x5, x4, x6); + saturnin_sbox(x1, x2, x3, x0); + saturnin_sbox(x7, x5, x4, x6); + saturnin_slice(x2, x3, x0, x1, x6, x5, x7, x4); + saturnin_mds(x2, x3, x0, x1, x6, x5, x7, x4); + saturnin_slice_inverse(x2, x3, x0, x1, x6, x5, x7, x4); + x2 ^= rc[0]; + saturnin_xor_key_rotated(x2, x3, x0, x1, x6, x5, x7, x4); + + /* Odd rounds */ + saturnin_sbox(x2, x3, x0, x1); + saturnin_sbox(x6, x5, x7, x4); + saturnin_mds(x3, x0, x1, x2, x4, x5, x6, x7); + saturnin_sbox(x3, x0, x1, x2); + saturnin_sbox(x4, x5, x6, x7); + saturnin_sheet(x0, x1, x2, x3, x7, x5, x4, x6); + saturnin_mds(x0, x1, x2, x3, x7, x5, x4, x6); + saturnin_sheet_inverse(x0, x1, x2, x3, x7, x5, x4, x6); + x0 ^= rc[1]; + saturnin_xor_key(x0, x1, x2, x3, x7, x5, x4, x6); + + /* Correct the rotation of the second half before the next round */ + temp = x4; + x4 = x7; + x7 = x6; + x6 = temp; + } + + /* Store the local variables to the output buffer */ + saturnin_store_word32(output, x0); + saturnin_store_word32(output + 2, x1); + saturnin_store_word32(output + 4, x2); + saturnin_store_word32(output + 6, x3); + saturnin_store_word32(output + 8, x4); + saturnin_store_word32(output + 10, x5); + saturnin_store_word32(output + 12, x6); + saturnin_store_word32(output + 14, x7); +} + +void saturnin_decrypt_block + (const saturnin_key_schedule_t *ks, unsigned char *output, + const unsigned char *input, unsigned domain) +{ + unsigned rounds = (domain >= SATURNIN_DOMAIN_16_7) ? 8 : 5; + const uint32_t *rc = saturnin_rc + domain + (rounds - 1) * 2; + uint32_t x0, x1, x2, x3, x4, x5, x6, x7, temp; + + /* Load the input into local variables */ + x0 = saturnin_load_word32(input); + x1 = saturnin_load_word32(input + 2); + x2 = saturnin_load_word32(input + 4); + x3 = saturnin_load_word32(input + 6); + x4 = saturnin_load_word32(input + 8); + x5 = saturnin_load_word32(input + 10); + x6 = saturnin_load_word32(input + 12); + x7 = saturnin_load_word32(input + 14); + + /* Perform all decryption rounds, two at a time */ + for (; rounds > 0; --rounds, rc -= 2) { + /* Correct the rotation of the second half before the next round */ + temp = x6; + x6 = x7; + x7 = x4; + x4 = temp; + + /* Odd rounds */ + saturnin_xor_key(x0, x1, x2, x3, x7, x5, x4, x6); + x0 ^= rc[1]; + saturnin_sheet(x0, x1, x2, x3, x7, x5, x4, x6); + saturnin_mds_inverse(x0, x1, x2, x3, x7, x5, x4, x6); + saturnin_sheet_inverse(x0, x1, x2, x3, x7, x5, x4, x6); + saturnin_sbox_inverse(x3, x0, x1, x2); + saturnin_sbox_inverse(x4, x5, x6, x7); + saturnin_mds_inverse(x3, x0, x1, x2, x4, x5, x6, x7); + saturnin_sbox_inverse(x2, x3, x0, x1); + saturnin_sbox_inverse(x6, x5, x7, x4); + + /* Even rounds */ + saturnin_xor_key_rotated(x2, x3, x0, x1, x6, x5, x7, x4); + x2 ^= rc[0]; + saturnin_slice(x2, x3, x0, x1, x6, x5, x7, x4); + saturnin_mds_inverse(x2, x3, x0, x1, x6, x5, x7, x4); + saturnin_slice_inverse(x2, x3, x0, x1, x6, x5, x7, x4); + saturnin_sbox_inverse(x1, x2, x3, x0); + saturnin_sbox_inverse(x7, x5, x4, x6); + saturnin_mds_inverse(x1, x2, x3, x0, x7, x5, x4, x6); + saturnin_sbox_inverse(x0, x1, x2, x3); + saturnin_sbox_inverse(x4, x5, x6, x7); + } + + /* XOR the key into the state */ + saturnin_xor_key(x0, x1, x2, x3, x4, x5, x6, x7); + + /* Store the local variables to the output buffer */ + saturnin_store_word32(output, x0); + saturnin_store_word32(output + 2, x1); + saturnin_store_word32(output + 4, x2); + saturnin_store_word32(output + 6, x3); + saturnin_store_word32(output + 8, x4); + saturnin_store_word32(output + 10, x5); + saturnin_store_word32(output + 12, x6); + saturnin_store_word32(output + 14, x7); +} + +#endif /* !__AVR__ */ diff --git a/saturnin/Implementations/crypto_aead/saturninshortv2/rhys/internal-saturnin.h b/saturnin/Implementations/crypto_aead/saturninshortv2/rhys/internal-saturnin.h new file mode 100644 index 0000000..8af07c3 --- /dev/null +++ b/saturnin/Implementations/crypto_aead/saturninshortv2/rhys/internal-saturnin.h @@ -0,0 +1,137 @@ +/* + * Copyright (C) 2020 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef LW_INTERNAL_SATURNIN_H +#define LW_INTERNAL_SATURNIN_H + +/** + * \file internal-saturnin.h + * \brief Saturnin block cipher. + * + * References: https://project.inria.fr/saturnin/ + */ + +#include "internal-util.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * \brief Size of a Saturnin block in bytes. + */ +#define SATURNIN_BLOCK_SIZE 32 + +/** + * \brief Domain separator index 1 for the 10-round version of Saturnin. + */ +#define SATURNIN_DOMAIN_10_1 0 + +/** + * \brief Domain separator index 2 for the 10-round version of Saturnin. + */ +#define SATURNIN_DOMAIN_10_2 10 + +/** + * \brief Domain separator index 3 for the 10-round version of Saturnin. + */ +#define SATURNIN_DOMAIN_10_3 20 + +/** + * \brief Domain separator index 4 for the 10-round version of Saturnin. + */ +#define SATURNIN_DOMAIN_10_4 30 + +/** + * \brief Domain separator index 5 for the 10-round version of Saturnin. + */ +#define SATURNIN_DOMAIN_10_5 40 + +/** + * \brief Domain separator index 6 for the 10-round version of Saturnin. + */ +#define SATURNIN_DOMAIN_10_6 50 + +/** + * \brief Domain separator index 7 for the 16-round version of Saturnin. + */ +#define SATURNIN_DOMAIN_16_7 60 + +/** + * \brief Domain separator index 8 for the 16-round version of Saturnin. + */ +#define SATURNIN_DOMAIN_16_8 76 + +/** + * \brief Structure of the key schedule for Saturnin. + */ +typedef struct +{ + /** Pre-computed round keys for Saturnin */ + uint32_t k[16]; + +} saturnin_key_schedule_t; + +/** + * \brief Sets up a key schedule for Saturnin. + * + * \param ks Points to the key schedule to initialize. + * \param key Points to the 32 bytes of the key data. + */ +void saturnin_setup_key + (saturnin_key_schedule_t *ks, const unsigned char *key); + +/** + * \brief Encrypts a 256-bit block with Saturnin. + * + * \param ks Points to the Saturnin key schedule. + * \param output Output buffer which must be at least 32 bytes in length. + * \param input Input buffer which must be at least 32 bytes in length. + * \param domain Domain separator and round count indicator. + * + * The \a input and \a output buffers can be the same buffer for + * in-place encryption. + */ +void saturnin_encrypt_block + (const saturnin_key_schedule_t *ks, unsigned char *output, + const unsigned char *input, unsigned domain); + +/** + * \brief Decrypts a 256-bit block with Saturnin. + * + * \param ks Points to the Saturnin key schedule. + * \param output Output buffer which must be at least 32 bytes in length. + * \param input Input buffer which must be at least 32 bytes in length. + * \param domain Domain separator and round count indicator. + * + * The \a input and \a output buffers can be the same buffer for + * in-place decryption. + */ +void saturnin_decrypt_block + (const saturnin_key_schedule_t *ks, unsigned char *output, + const unsigned char *input, unsigned domain); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/saturnin/Implementations/crypto_aead/saturninshortv2/rhys/saturnin.c b/saturnin/Implementations/crypto_aead/saturninshortv2/rhys/saturnin.c index 734fc69..d2bd2cc 100644 --- a/saturnin/Implementations/crypto_aead/saturninshortv2/rhys/saturnin.c +++ b/saturnin/Implementations/crypto_aead/saturninshortv2/rhys/saturnin.c @@ -21,7 +21,7 @@ */ #include "saturnin.h" -#include "internal-util.h" +#include "internal-saturnin.h" #include aead_cipher_t const saturnin_cipher = { @@ -57,440 +57,22 @@ aead_hash_algorithm_t const saturnin_hash_algorithm = { 0 /* squeeze */ }; -/* Round constant tables for various combinations of rounds and domain_sep */ -static uint32_t const RC_10_1[] = { - 0x4eb026c2, 0x90595303, 0xaa8fe632, 0xfe928a92, 0x4115a419, - 0x93539532, 0x5db1cc4e, 0x541515ca, 0xbd1f55a8, 0x5a6e1a0d -}; -static uint32_t const RC_10_2[] = { - 0x4e4526b5, 0xa3565ff0, 0x0f8f20d8, 0x0b54bee1, 0x7d1a6c9d, - 0x17a6280a, 0xaa46c986, 0xc1199062, 0x182c5cde, 0xa00d53fe -}; -static uint32_t const RC_10_3[] = { - 0x4e162698, 0xb2535ba1, 0x6c8f9d65, 0x5816ad30, 0x691fd4fa, - 0x6bf5bcf9, 0xf8eb3525, 0xb21decfa, 0x7b3da417, 0xf62c94b4 -}; -static uint32_t const RC_10_4[] = { - 0x4faf265b, 0xc5484616, 0x45dcad21, 0xe08bd607, 0x0504fdb8, - 0x1e1f5257, 0x45fbc216, 0xeb529b1f, 0x52194e32, 0x5498c018 -}; -static uint32_t const RC_10_5[] = { - 0x4ffc2676, 0xd44d4247, 0x26dc109c, 0xb3c9c5d6, 0x110145df, - 0x624cc6a4, 0x17563eb5, 0x9856e787, 0x3108b6fb, 0x02b90752 -}; -static uint32_t const RC_10_6[] = { - 0x4f092601, 0xe7424eb4, 0x83dcd676, 0x460ff1a5, 0x2d0e8d5b, - 0xe6b97b9c, 0xe0a13b7d, 0x0d5a622f, 0x943bbf8d, 0xf8da4ea1 -}; -static uint32_t const RC_16_7[] = { - 0x3fba180c, 0x563ab9ab, 0x125ea5ef, 0x859da26c, 0xb8cf779b, - 0x7d4de793, 0x07efb49f, 0x8d525306, 0x1e08e6ab, 0x41729f87, - 0x8c4aef0a, 0x4aa0c9a7, 0xd93a95ef, 0xbb00d2af, 0xb62c5bf0, - 0x386d94d8 -}; -static uint32_t const RC_16_8[] = { - 0x3c9b19a7, 0xa9098694, 0x23f878da, 0xa7b647d3, 0x74fc9d78, - 0xeacaae11, 0x2f31a677, 0x4cc8c054, 0x2f51ca05, 0x5268f195, - 0x4f5b8a2b, 0xf614b4ac, 0xf1d95401, 0x764d2568, 0x6a493611, - 0x8eef9c3e -}; - -/* Rotate the 4-bit nibbles within a 16-bit word left */ -#define leftRotate4_N(a, mask1, bits1, mask2, bits2) \ - do { \ - uint32_t _temp = (a); \ - (a) = ((_temp & (mask1)) << (bits1)) | \ - ((_temp & ((mask1) ^ (uint32_t)0xFFFFU)) >> (4 - (bits1))) | \ - ((_temp & (((uint32_t)(mask2)) << 16)) << (bits2)) | \ - ((_temp & (((uint32_t)((mask2)) << 16) ^ 0xFFFF0000U)) >> (4 - (bits2))); \ - } while (0) - -/* Rotate 16-bit subwords left */ -#define leftRotate16_N(a, mask1, bits1, mask2, bits2) \ - do { \ - uint32_t _temp = (a); \ - (a) = ((_temp & (mask1)) << (bits1)) | \ - ((_temp & ((mask1) ^ (uint32_t)0xFFFFU)) >> (16 - (bits1))) | \ - ((_temp & (((uint32_t)(mask2)) << 16)) << (bits2)) | \ - ((_temp & (((uint32_t)((mask2)) << 16) ^ 0xFFFF0000U)) >> (16 - (bits2))); \ - } while (0) - -/* XOR the SATURNIN state with the key */ -#define saturnin_xor_key() \ - do { \ - for (index = 0; index < 8; ++index) \ - S[index] ^= K[index]; \ - } while (0) - -/* XOR the SATURNIN state with a rotated version of the key */ -#define saturnin_xor_key_rotated() \ - do { \ - for (index = 0; index < 8; ++index) \ - S[index] ^= K[index + 8]; \ - } while (0) - -/* Apply an SBOX layer for SATURNIN - definition from the specification */ -#define S_LAYER(a, b, c, d) \ - do { \ - (a) ^= (b) & (c); \ - (b) ^= (a) | (d); \ - (d) ^= (b) | (c); \ - (c) ^= (b) & (d); \ - (b) ^= (a) | (c); \ - (a) ^= (b) | (d); \ - } while (0) - -/* Apply an SBOX layer for SATURNIN in reverse */ -#define S_LAYER_INVERSE(a, b, c, d) \ - do { \ - (a) ^= (b) | (d); \ - (b) ^= (a) | (c); \ - (c) ^= (b) & (d); \ - (d) ^= (b) | (c); \ - (b) ^= (a) | (d); \ - (a) ^= (b) & (c); \ - } while (0) - -/** - * \brief Applies the SBOX to the SATURNIN state. - * - * \param S The state. - */ -static void saturnin_sbox(uint32_t S[8]) -{ - uint32_t a, b, c, d; - - /* PI_0 on the first half of the state */ - a = S[0]; b = S[1]; c = S[2]; d = S[3]; - S_LAYER(a, b, c, d); - S[0] = b; S[1] = c; S[2] = d; S[3] = a; - - /* PI_1 on the second half of the state */ - a = S[4]; b = S[5]; c = S[6]; d = S[7]; - S_LAYER(a, b, c, d); - S[4] = d; S[5] = b; S[6] = a; S[7] = c; -} - -/** - * \brief Applies the inverse of the SBOX to the SATURNIN state. - * - * \param S The state. - */ -static void saturnin_sbox_inverse(uint32_t S[8]) -{ - uint32_t a, b, c, d; - - /* PI_0 on the first half of the state */ - b = S[0]; c = S[1]; d = S[2]; a = S[3]; - S_LAYER_INVERSE(a, b, c, d); - S[0] = a; S[1] = b; S[2] = c; S[3] = d; - - /* PI_1 on the second half of the state */ - d = S[4]; b = S[5]; a = S[6]; c = S[7]; - S_LAYER_INVERSE(a, b, c, d); - S[4] = a; S[5] = b; S[6] = c; S[7] = d; -} - -/** - * \brief Applies the MDS matrix to the SATURNIN state. - * - * \param S The state. - */ -static void saturnin_mds(uint32_t S[8]) -{ - uint32_t x0, x1, x2, x3, x4, x5, x6, x7; - uint32_t tmp; - - /* Load the state into temporary working variables */ - x0 = S[0]; x1 = S[1]; x2 = S[2]; x3 = S[3]; - x4 = S[4]; x5 = S[5]; x6 = S[6]; x7 = S[7]; - - /* Apply the MDS matrix to the state */ - #define SWAP(a) (((a) << 16) | ((a) >> 16)) - #define MUL(x0, x1, x2, x3, tmp) \ - do { \ - tmp = x0; x0 = x1; x1 = x2; x2 = x3; x3 = tmp ^ x0; \ - } while (0) - x0 ^= x4; x1 ^= x5; x2 ^= x6; x3 ^= x7; - MUL(x4, x5, x6, x7, tmp); - x4 ^= SWAP(x0); x5 ^= SWAP(x1); - x6 ^= SWAP(x2); x7 ^= SWAP(x3); - MUL(x0, x1, x2, x3, tmp); - MUL(x0, x1, x2, x3, tmp); - x0 ^= x4; x1 ^= x5; x2 ^= x6; x3 ^= x7; - x4 ^= SWAP(x0); x5 ^= SWAP(x1); - x6 ^= SWAP(x2); x7 ^= SWAP(x3); - - /* Store the temporary working variables back into the state */ - S[0] = x0; S[1] = x1; S[2] = x2; S[3] = x3; - S[4] = x4; S[5] = x5; S[6] = x6; S[7] = x7; -} - -/** - * \brief Applies the inverse of the MDS matrix to the SATURNIN state. - * - * \param S The state. - */ -static void saturnin_mds_inverse(uint32_t S[8]) -{ - uint32_t x0, x1, x2, x3, x4, x5, x6, x7; - uint32_t tmp; - - /* Load the state into temporary working variables */ - x0 = S[0]; x1 = S[1]; x2 = S[2]; x3 = S[3]; - x4 = S[4]; x5 = S[5]; x6 = S[6]; x7 = S[7]; - - /* Apply the inverse of the MDS matrix to the state */ - #define MULINV(x0, x1, x2, x3, tmp) \ - do { \ - tmp = x3; x3 = x2; x2 = x1; x1 = x0; x0 = x1 ^ tmp; \ - } while (0) - x6 ^= SWAP(x2); x7 ^= SWAP(x3); - x4 ^= SWAP(x0); x5 ^= SWAP(x1); - x0 ^= x4; x1 ^= x5; x2 ^= x6; x3 ^= x7; - MULINV(x0, x1, x2, x3, tmp); - MULINV(x0, x1, x2, x3, tmp); - x6 ^= SWAP(x2); x7 ^= SWAP(x3); - x4 ^= SWAP(x0); x5 ^= SWAP(x1); - MULINV(x4, x5, x6, x7, tmp); - x0 ^= x4; x1 ^= x5; x2 ^= x6; x3 ^= x7; - - /* Store the temporary working variables back into the state */ - S[0] = x0; S[1] = x1; S[2] = x2; S[3] = x3; - S[4] = x4; S[5] = x5; S[6] = x6; S[7] = x7; -} - -/** - * \brief Applies the slice permutation to the SATURNIN state. - * - * \param S The state. - */ -static void saturnin_slice(uint32_t S[8]) -{ - leftRotate4_N(S[0], 0xFFFFU, 0, 0x3333, 2); - leftRotate4_N(S[1], 0xFFFFU, 0, 0x3333, 2); - leftRotate4_N(S[2], 0xFFFFU, 0, 0x3333, 2); - leftRotate4_N(S[3], 0xFFFFU, 0, 0x3333, 2); - - leftRotate4_N(S[4], 0x7777U, 1, 0x1111, 3); - leftRotate4_N(S[5], 0x7777U, 1, 0x1111, 3); - leftRotate4_N(S[6], 0x7777U, 1, 0x1111, 3); - leftRotate4_N(S[7], 0x7777U, 1, 0x1111, 3); -} - -/** - * \brief Applies the inverse of the slice permutation to the SATURNIN state. - * - * \param S The state. - */ -static void saturnin_slice_inverse(uint32_t S[8]) -{ - leftRotate4_N(S[0], 0xFFFFU, 0, 0x3333, 2); - leftRotate4_N(S[1], 0xFFFFU, 0, 0x3333, 2); - leftRotate4_N(S[2], 0xFFFFU, 0, 0x3333, 2); - leftRotate4_N(S[3], 0xFFFFU, 0, 0x3333, 2); - - leftRotate4_N(S[4], 0x1111U, 3, 0x7777, 1); - leftRotate4_N(S[5], 0x1111U, 3, 0x7777, 1); - leftRotate4_N(S[6], 0x1111U, 3, 0x7777, 1); - leftRotate4_N(S[7], 0x1111U, 3, 0x7777, 1); -} - -/** - * \brief Applies the sheet permutation to the SATURNIN state. - * - * \param S The state. - */ -static void saturnin_sheet(uint32_t S[8]) -{ - leftRotate16_N(S[0], 0xFFFFU, 0, 0x00FF, 8); - leftRotate16_N(S[1], 0xFFFFU, 0, 0x00FF, 8); - leftRotate16_N(S[2], 0xFFFFU, 0, 0x00FF, 8); - leftRotate16_N(S[3], 0xFFFFU, 0, 0x00FF, 8); - - leftRotate16_N(S[4], 0x0FFFU, 4, 0x000F, 12); - leftRotate16_N(S[5], 0x0FFFU, 4, 0x000F, 12); - leftRotate16_N(S[6], 0x0FFFU, 4, 0x000F, 12); - leftRotate16_N(S[7], 0x0FFFU, 4, 0x000F, 12); -} - -/** - * \brief Applies the inverse of the sheet permutation to the SATURNIN state. - * - * \param S The state. - */ -static void saturnin_sheet_inverse(uint32_t S[8]) -{ - leftRotate16_N(S[0], 0xFFFFU, 0, 0x00FF, 8); - leftRotate16_N(S[1], 0xFFFFU, 0, 0x00FF, 8); - leftRotate16_N(S[2], 0xFFFFU, 0, 0x00FF, 8); - leftRotate16_N(S[3], 0xFFFFU, 0, 0x00FF, 8); - - leftRotate16_N(S[4], 0x000FU, 12, 0x0FFF, 4); - leftRotate16_N(S[5], 0x000FU, 12, 0x0FFF, 4); - leftRotate16_N(S[6], 0x000FU, 12, 0x0FFF, 4); - leftRotate16_N(S[7], 0x000FU, 12, 0x0FFF, 4); -} - -/** - * \brief Encrypts a 256-bit block with the SATURNIN block cipher. - * - * \param output Ciphertext output block, 32 bytes. - * \param input Plaintext input block, 32 bytes. - * \param key Points to the 32 byte key for the block cipher. - * \param rounds Number of rounds to perform. - * \param RC Round constants to use for domain separation. - * - * The \a input and \a output buffers can be the same. - * - * \sa saturnin_block_decrypt() - */ -static void saturnin_block_encrypt - (unsigned char *output, const unsigned char *input, - const unsigned char *key, unsigned rounds, const uint32_t *RC) -{ - uint32_t K[16]; - uint32_t S[8]; - uint32_t temp; - unsigned index; - - /* Unpack the key and the input block */ - for (index = 0; index < 16; index += 2) { - temp = ((uint32_t)(key[index])) | - (((uint32_t)(key[index + 1])) << 8) | - (((uint32_t)(key[index + 16])) << 16) | - (((uint32_t)(key[index + 17])) << 24); - K[index / 2] = temp; - K[8 + (index / 2)] = ((temp & 0x001F001FU) << 11) | - ((temp >> 5) & 0x07FF07FFU); - S[index / 2] = ((uint32_t)(input[index])) | - (((uint32_t)(input[index + 1])) << 8) | - (((uint32_t)(input[index + 16])) << 16) | - (((uint32_t)(input[index + 17])) << 24); - } - - /* XOR the key into the state */ - saturnin_xor_key(); - - /* Perform all encryption rounds */ - for (; rounds > 0; rounds -= 2, RC += 2) { - saturnin_sbox(S); - saturnin_mds(S); - saturnin_sbox(S); - saturnin_slice(S); - saturnin_mds(S); - saturnin_slice_inverse(S); - S[0] ^= RC[0]; - saturnin_xor_key_rotated(); - - saturnin_sbox(S); - saturnin_mds(S); - saturnin_sbox(S); - saturnin_sheet(S); - saturnin_mds(S); - saturnin_sheet_inverse(S); - S[0] ^= RC[1]; - saturnin_xor_key(); - } - - /* Encode the state into the output block */ - for (index = 0; index < 16; index += 2) { - temp = S[index / 2]; - output[index] = (uint8_t)temp; - output[index + 1] = (uint8_t)(temp >> 8); - output[index + 16] = (uint8_t)(temp >> 16); - output[index + 17] = (uint8_t)(temp >> 24); - } -} - -/** - * \brief Decrypts a 256-bit block with the SATURNIN block cipher. - * - * \param output Plaintext output block, 32 bytes. - * \param input Ciphertext input block, 32 bytes. - * \param key Points to the 32 byte key for the block cipher. - * \param rounds Number of rounds to perform. - * \param RC Round constants to use for domain separation. - * - * The \a input and \a output buffers can be the same. - * - * \sa saturnin_block_encrypt() - */ -static void saturnin_block_decrypt - (unsigned char *output, const unsigned char *input, - const unsigned char *key, unsigned rounds, const uint32_t *RC) -{ - uint32_t K[16]; - uint32_t S[8]; - uint32_t temp; - unsigned index; - - /* Unpack the key and the input block */ - for (index = 0; index < 16; index += 2) { - temp = ((uint32_t)(key[index])) | - (((uint32_t)(key[index + 1])) << 8) | - (((uint32_t)(key[index + 16])) << 16) | - (((uint32_t)(key[index + 17])) << 24); - K[index / 2] = temp; - K[8 + (index / 2)] = ((temp & 0x001F001FU) << 11) | - ((temp >> 5) & 0x07FF07FFU); - S[index / 2] = ((uint32_t)(input[index])) | - (((uint32_t)(input[index + 1])) << 8) | - (((uint32_t)(input[index + 16])) << 16) | - (((uint32_t)(input[index + 17])) << 24); - } - - /* Perform all decryption rounds */ - RC += rounds - 2; - for (; rounds > 0; rounds -= 2, RC -= 2) { - saturnin_xor_key(); - S[0] ^= RC[1]; - saturnin_sheet(S); - saturnin_mds_inverse(S); - saturnin_sheet_inverse(S); - saturnin_sbox_inverse(S); - saturnin_mds_inverse(S); - saturnin_sbox_inverse(S); - - saturnin_xor_key_rotated(); - S[0] ^= RC[0]; - saturnin_slice(S); - saturnin_mds_inverse(S); - saturnin_slice_inverse(S); - saturnin_sbox_inverse(S); - saturnin_mds_inverse(S); - saturnin_sbox_inverse(S); - } - - /* XOR the key into the state */ - saturnin_xor_key(); - - /* Encode the state into the output block */ - for (index = 0; index < 16; index += 2) { - temp = S[index / 2]; - output[index] = (uint8_t)temp; - output[index + 1] = (uint8_t)(temp >> 8); - output[index + 16] = (uint8_t)(temp >> 16); - output[index + 17] = (uint8_t)(temp >> 24); - } -} - /** * \brief Encrypts a 256-bit block with the SATURNIN block cipher and * then XOR's itself to generate a new key. * * \param block Block to be encrypted and then XOR'ed with itself. * \param key Points to the 32 byte key for the block cipher. - * \param rounds Number of rounds to perform. - * \param RC Round constants to use for domain separation. + * \param domain Domain separator and round counter. */ -void saturnin_block_encrypt_xor - (const unsigned char *block, unsigned char *key, - unsigned rounds, const uint32_t *RC) +static void saturnin_block_encrypt_xor + (const unsigned char *block, unsigned char *key, unsigned domain) { - unsigned char temp[32]; - saturnin_block_encrypt(temp, block, key, rounds, RC); - lw_xor_block_2_src(key, block, temp, 32); + saturnin_key_schedule_t ks; + unsigned char *temp = (unsigned char *)ks.k; /* Reuse some stack space */ + saturnin_setup_key(&ks, key); + saturnin_encrypt_block(&ks, temp, block, domain); + lw_xor_block_2_src(key, block, temp, SATURNIN_BLOCK_SIZE); } /** @@ -499,20 +81,20 @@ void saturnin_block_encrypt_xor * \param c Output ciphertext buffer. * \param m Input plaintext buffer. * \param mlen Length of the plaintext in bytes. - * \param k Points to the 32-byte key. + * \param ks Points to the key schedule. * \param block Points to the pre-formatted nonce block. */ static void saturnin_ctr_encrypt (unsigned char *c, const unsigned char *m, unsigned long long mlen, - const unsigned char *k, unsigned char *block) + const saturnin_key_schedule_t *ks, unsigned char *block) { /* Note: Specification requires a 95-bit counter but we only use 32-bit. * This limits the maximum packet size to 128Gb. That should be OK */ uint32_t counter = 1; - unsigned char out[32]; + unsigned char out[SATURNIN_BLOCK_SIZE]; while (mlen >= 32) { be_store_word32(block + 28, counter); - saturnin_block_encrypt(out, block, k, 10, RC_10_1); + saturnin_encrypt_block(ks, out, block, SATURNIN_DOMAIN_10_1); lw_xor_block_2_src(c, out, m, 32); c += 32; m += 32; @@ -521,7 +103,7 @@ static void saturnin_ctr_encrypt } if (mlen > 0) { be_store_word32(block + 28, counter); - saturnin_block_encrypt(out, block, k, 10, RC_10_1); + saturnin_encrypt_block(ks, out, block, SATURNIN_DOMAIN_10_1); lw_xor_block_2_src(c, out, m, (unsigned)mlen); } } @@ -533,18 +115,17 @@ static void saturnin_ctr_encrypt * \param block Temporary block of 32 bytes from the caller. * \param m Points to the message to be authenticated. * \param mlen Length of the message to be authenticated in bytes. - * \param rounds Number of rounds to perform. - * \param RC1 Round constants to use for domain separation on full blocks. - * \param RC2 Round constants to use for domain separation on the last block. + * \param domain1 Round count and domain separator for full blocks. + * \param domain2 Round count and domain separator for the last block. */ static void saturnin_authenticate (unsigned char *tag, unsigned char *block, const unsigned char *m, unsigned long long mlen, - unsigned rounds, const uint32_t *RC1, const uint32_t *RC2) + unsigned domain1, unsigned domain2) { unsigned temp; while (mlen >= 32) { - saturnin_block_encrypt_xor(m, tag, rounds, RC1); + saturnin_block_encrypt_xor(m, tag, domain1); m += 32; mlen -= 32; } @@ -552,7 +133,7 @@ static void saturnin_authenticate memcpy(block, m, temp); block[temp] = 0x80; memset(block + temp + 1, 0, 31 - temp); - saturnin_block_encrypt_xor(block, tag, rounds, RC2); + saturnin_block_encrypt_xor(block, tag, domain2); } int saturnin_aead_encrypt @@ -563,6 +144,7 @@ int saturnin_aead_encrypt const unsigned char *npub, const unsigned char *k) { + saturnin_key_schedule_t ks; unsigned char block[32]; unsigned char *tag; (void)nsec; @@ -576,17 +158,20 @@ int saturnin_aead_encrypt memset(block + 17, 0, 15); /* Encrypt the plaintext in counter mode to produce the ciphertext */ - saturnin_ctr_encrypt(c, m, mlen, k, block); + saturnin_setup_key(&ks, k); + saturnin_ctr_encrypt(c, m, mlen, &ks, block); /* Set the counter back to zero and then encrypt the nonce */ tag = c + mlen; memcpy(tag, k, 32); memset(block + 17, 0, 15); - saturnin_block_encrypt_xor(block, tag, 10, RC_10_2); + saturnin_block_encrypt_xor(block, tag, SATURNIN_DOMAIN_10_2); /* Authenticate the associated data and the ciphertext */ - saturnin_authenticate(tag, block, ad, adlen, 10, RC_10_2, RC_10_3); - saturnin_authenticate(tag, block, c, mlen, 10, RC_10_4, RC_10_5); + saturnin_authenticate + (tag, block, ad, adlen, SATURNIN_DOMAIN_10_2, SATURNIN_DOMAIN_10_3); + saturnin_authenticate + (tag, block, c, mlen, SATURNIN_DOMAIN_10_4, SATURNIN_DOMAIN_10_5); return 0; } @@ -598,6 +183,7 @@ int saturnin_aead_decrypt const unsigned char *npub, const unsigned char *k) { + saturnin_key_schedule_t ks; unsigned char block[32]; unsigned char tag[32]; (void)nsec; @@ -614,17 +200,20 @@ int saturnin_aead_decrypt /* Encrypt the nonce to initialize the authentication phase */ memcpy(tag, k, 32); - saturnin_block_encrypt_xor(block, tag, 10, RC_10_2); + saturnin_block_encrypt_xor(block, tag, SATURNIN_DOMAIN_10_2); /* Authenticate the associated data and the ciphertext */ - saturnin_authenticate(tag, block, ad, adlen, 10, RC_10_2, RC_10_3); - saturnin_authenticate(tag, block, c, *mlen, 10, RC_10_4, RC_10_5); + saturnin_authenticate + (tag, block, ad, adlen, SATURNIN_DOMAIN_10_2, SATURNIN_DOMAIN_10_3); + saturnin_authenticate + (tag, block, c, *mlen, SATURNIN_DOMAIN_10_4, SATURNIN_DOMAIN_10_5); /* Decrypt the ciphertext in counter mode to produce the plaintext */ memcpy(block, npub, 16); block[16] = 0x80; memset(block + 17, 0, 15); - saturnin_ctr_encrypt(m, c, *mlen, k, block); + saturnin_setup_key(&ks, k); + saturnin_ctr_encrypt(m, c, *mlen, &ks, block); /* Check the authentication tag at the end of the message */ return aead_check_tag @@ -639,6 +228,7 @@ int saturnin_short_aead_encrypt const unsigned char *npub, const unsigned char *k) { + saturnin_key_schedule_t ks; unsigned char block[32]; unsigned temp; (void)nsec; @@ -656,7 +246,8 @@ int saturnin_short_aead_encrypt memset(block + 17 + temp, 0, 15 - temp); /* Encrypt the input block to produce the output ciphertext */ - saturnin_block_encrypt(c, block, k, 10, RC_10_6); + saturnin_setup_key(&ks, k); + saturnin_encrypt_block(&ks, c, block, SATURNIN_DOMAIN_10_6); *clen = 32; return 0; } @@ -669,6 +260,7 @@ int saturnin_short_aead_decrypt const unsigned char *npub, const unsigned char *k) { + saturnin_key_schedule_t ks; unsigned char block[32]; unsigned check1, check2, len; int index, result; @@ -682,7 +274,8 @@ int saturnin_short_aead_decrypt return -1; /* Decrypt the ciphertext block */ - saturnin_block_decrypt(block, c, k, 10, RC_10_6); + saturnin_setup_key(&ks, k); + saturnin_decrypt_block(&ks, block, c, SATURNIN_DOMAIN_10_6); /* Verify that the output block starts with the nonce and that it is * padded correctly. We need to do this very carefully to avoid leaking @@ -723,7 +316,8 @@ int saturnin_hash unsigned char tag[32]; unsigned char block[32]; memset(tag, 0, sizeof(tag)); - saturnin_authenticate(tag, block, in, inlen, 16, RC_16_7, RC_16_8); + saturnin_authenticate + (tag, block, in, inlen, SATURNIN_DOMAIN_16_7, SATURNIN_DOMAIN_16_8); memcpy(out, tag, 32); return 0; } @@ -752,12 +346,14 @@ void saturnin_hash_update state->s.count = 0; in += temp; inlen -= temp; - saturnin_block_encrypt_xor(state->s.block, state->s.hash, 16, RC_16_7); + saturnin_block_encrypt_xor + (state->s.block, state->s.hash, SATURNIN_DOMAIN_16_7); } /* Process full blocks that are aligned at state->s.count == 0 */ while (inlen >= 32) { - saturnin_block_encrypt_xor(in, state->s.hash, 16, RC_16_7); + saturnin_block_encrypt_xor + (in, state->s.hash, SATURNIN_DOMAIN_16_7); in += 32; inlen -= 32; } @@ -776,6 +372,7 @@ void saturnin_hash_finalize memset(state->s.block + state->s.count + 1, 0, 31 - state->s.count); /* Generate the final hash value */ - saturnin_block_encrypt_xor(state->s.block, state->s.hash, 16, RC_16_8); + saturnin_block_encrypt_xor + (state->s.block, state->s.hash, SATURNIN_DOMAIN_16_8); memcpy(out, state->s.hash, 32); } diff --git a/saturnin/Implementations/crypto_hash/saturninhashv2/rhys/internal-saturnin-avr.S b/saturnin/Implementations/crypto_hash/saturninhashv2/rhys/internal-saturnin-avr.S new file mode 100644 index 0000000..f20ce72 --- /dev/null +++ b/saturnin/Implementations/crypto_hash/saturninhashv2/rhys/internal-saturnin-avr.S @@ -0,0 +1,6365 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_0, @object + .size table_0, 384 +table_0: + .byte 194 + .byte 38 + .byte 176 + .byte 78 + .byte 3 + .byte 83 + .byte 89 + .byte 144 + .byte 50 + .byte 230 + .byte 143 + .byte 170 + .byte 146 + .byte 138 + .byte 146 + .byte 254 + .byte 25 + .byte 164 + .byte 21 + .byte 65 + .byte 50 + .byte 149 + .byte 83 + .byte 147 + .byte 78 + .byte 204 + .byte 177 + .byte 93 + .byte 202 + .byte 21 + .byte 21 + .byte 84 + .byte 168 + .byte 85 + .byte 31 + .byte 189 + .byte 13 + .byte 26 + .byte 110 + .byte 90 + .byte 181 + .byte 38 + .byte 69 + .byte 78 + .byte 240 + .byte 95 + .byte 86 + .byte 163 + .byte 216 + .byte 32 + .byte 143 + .byte 15 + .byte 225 + .byte 190 + .byte 84 + .byte 11 + .byte 157 + .byte 108 + .byte 26 + .byte 125 + .byte 10 + .byte 40 + .byte 166 + .byte 23 + .byte 134 + .byte 201 + .byte 70 + .byte 170 + .byte 98 + .byte 144 + .byte 25 + .byte 193 + .byte 222 + .byte 92 + .byte 44 + .byte 24 + .byte 254 + .byte 83 + .byte 13 + .byte 160 + .byte 152 + .byte 38 + .byte 22 + .byte 78 + .byte 161 + .byte 91 + .byte 83 + .byte 178 + .byte 101 + .byte 157 + .byte 143 + .byte 108 + .byte 48 + .byte 173 + .byte 22 + .byte 88 + .byte 250 + .byte 212 + .byte 31 + .byte 105 + .byte 249 + .byte 188 + .byte 245 + .byte 107 + .byte 37 + .byte 53 + .byte 235 + .byte 248 + .byte 250 + .byte 236 + .byte 29 + .byte 178 + .byte 23 + .byte 164 + .byte 61 + .byte 123 + .byte 180 + .byte 148 + .byte 44 + .byte 246 + .byte 91 + .byte 38 + .byte 175 + .byte 79 + .byte 22 + .byte 70 + .byte 72 + .byte 197 + .byte 33 + .byte 173 + .byte 220 + .byte 69 + .byte 7 + .byte 214 + .byte 139 + .byte 224 + .byte 184 + .byte 253 + .byte 4 + .byte 5 + .byte 87 + .byte 82 + .byte 31 + .byte 30 + .byte 22 + .byte 194 + .byte 251 + .byte 69 + .byte 31 + .byte 155 + .byte 82 + .byte 235 + .byte 50 + .byte 78 + .byte 25 + .byte 82 + .byte 24 + .byte 192 + .byte 152 + .byte 84 + .byte 118 + .byte 38 + .byte 252 + .byte 79 + .byte 71 + .byte 66 + .byte 77 + .byte 212 + .byte 156 + .byte 16 + .byte 220 + .byte 38 + .byte 214 + .byte 197 + .byte 201 + .byte 179 + .byte 223 + .byte 69 + .byte 1 + .byte 17 + .byte 164 + .byte 198 + .byte 76 + .byte 98 + .byte 181 + .byte 62 + .byte 86 + .byte 23 + .byte 135 + .byte 231 + .byte 86 + .byte 152 + .byte 251 + .byte 182 + .byte 8 + .byte 49 + .byte 82 + .byte 7 + .byte 185 + .byte 2 + .byte 1 + .byte 38 + .byte 9 + .byte 79 + .byte 180 + .byte 78 + .byte 66 + .byte 231 + .byte 118 + .byte 214 + .byte 220 + .byte 131 + .byte 165 + .byte 241 + .byte 15 + .byte 70 + .byte 91 + .byte 141 + .byte 14 + .byte 45 + .byte 156 + .byte 123 + .byte 185 + .byte 230 + .byte 125 + .byte 59 + .byte 161 + .byte 224 + .byte 47 + .byte 98 + .byte 90 + .byte 13 + .byte 141 + .byte 191 + .byte 59 + .byte 148 + .byte 161 + .byte 78 + .byte 218 + .byte 248 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 12 + .byte 24 + .byte 186 + .byte 63 + .byte 171 + .byte 185 + .byte 58 + .byte 86 + .byte 239 + .byte 165 + .byte 94 + .byte 18 + .byte 108 + .byte 162 + .byte 157 + .byte 133 + .byte 155 + .byte 119 + .byte 207 + .byte 184 + .byte 147 + .byte 231 + .byte 77 + .byte 125 + .byte 159 + .byte 180 + .byte 239 + .byte 7 + .byte 6 + .byte 83 + .byte 82 + .byte 141 + .byte 171 + .byte 230 + .byte 8 + .byte 30 + .byte 135 + .byte 159 + .byte 114 + .byte 65 + .byte 10 + .byte 239 + .byte 74 + .byte 140 + .byte 167 + .byte 201 + .byte 160 + .byte 74 + .byte 239 + .byte 149 + .byte 58 + .byte 217 + .byte 175 + .byte 210 + .byte 0 + .byte 187 + .byte 240 + .byte 91 + .byte 44 + .byte 182 + .byte 216 + .byte 148 + .byte 109 + .byte 56 + .byte 167 + .byte 25 + .byte 155 + .byte 60 + .byte 148 + .byte 134 + .byte 9 + .byte 169 + .byte 218 + .byte 120 + .byte 248 + .byte 35 + .byte 211 + .byte 71 + .byte 182 + .byte 167 + .byte 120 + .byte 157 + .byte 252 + .byte 116 + .byte 17 + .byte 174 + .byte 202 + .byte 234 + .byte 119 + .byte 166 + .byte 49 + .byte 47 + .byte 84 + .byte 192 + .byte 200 + .byte 76 + .byte 5 + .byte 202 + .byte 81 + .byte 47 + .byte 149 + .byte 241 + .byte 104 + .byte 82 + .byte 43 + .byte 138 + .byte 91 + .byte 79 + .byte 172 + .byte 180 + .byte 20 + .byte 246 + .byte 1 + .byte 84 + .byte 217 + .byte 241 + .byte 104 + .byte 37 + .byte 77 + .byte 118 + .byte 17 + .byte 54 + .byte 73 + .byte 106 + .byte 62 + .byte 156 + .byte 239 + .byte 142 + + .text +.global saturnin_setup_key + .type saturnin_setup_key, @function +saturnin_setup_key: + movw r30,r24 + movw r26,r22 +.L__stack_usage = 2 + ld r18,X+ + ld r19,X+ + st Z,r18 + std Z+1,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+32,r18 + std Z+33,r19 + ld r18,X+ + ld r19,X+ + std Z+4,r18 + std Z+5,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+36,r18 + std Z+37,r19 + ld r18,X+ + ld r19,X+ + std Z+8,r18 + std Z+9,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+40,r18 + std Z+41,r19 + ld r18,X+ + ld r19,X+ + std Z+12,r18 + std Z+13,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+44,r18 + std Z+45,r19 + ld r18,X+ + ld r19,X+ + std Z+16,r18 + std Z+17,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+48,r18 + std Z+49,r19 + ld r18,X+ + ld r19,X+ + std Z+20,r18 + std Z+21,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+52,r18 + std Z+53,r19 + ld r18,X+ + ld r19,X+ + std Z+24,r18 + std Z+25,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+56,r18 + std Z+57,r19 + ld r18,X+ + ld r19,X+ + std Z+28,r18 + std Z+29,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+60,r18 + std Z+61,r19 + ld r18,X+ + ld r19,X+ + std Z+2,r18 + std Z+3,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+34,r18 + std Z+35,r19 + ld r18,X+ + ld r19,X+ + std Z+6,r18 + std Z+7,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+38,r18 + std Z+39,r19 + ld r18,X+ + ld r19,X+ + std Z+10,r18 + std Z+11,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+42,r18 + std Z+43,r19 + ld r18,X+ + ld r19,X+ + std Z+14,r18 + std Z+15,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+46,r18 + std Z+47,r19 + ld r18,X+ + ld r19,X+ + std Z+18,r18 + std Z+19,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+50,r18 + std Z+51,r19 + ld r18,X+ + ld r19,X+ + std Z+22,r18 + std Z+23,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+54,r18 + std Z+55,r19 + ld r18,X+ + ld r19,X+ + std Z+26,r18 + std Z+27,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+58,r18 + std Z+59,r19 + ld r18,X+ + ld r19,X+ + std Z+30,r18 + std Z+31,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+62,r18 + std Z+63,r19 + ret + .size saturnin_setup_key, .-saturnin_setup_key + + .text +.global saturnin_encrypt_block + .type saturnin_encrypt_block, @function +saturnin_encrypt_block: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r23 + push r22 + movw r30,r24 + movw r26,r20 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 52 + ld r2,X+ + ld r3,X+ + ld r0,Z + eor r2,r0 + ldd r0,Z+1 + eor r3,r0 + ld r6,X+ + ld r7,X+ + ldd r0,Z+4 + eor r6,r0 + ldd r0,Z+5 + eor r7,r0 + ld r10,X+ + ld r11,X+ + ldd r0,Z+8 + eor r10,r0 + ldd r0,Z+9 + eor r11,r0 + ld r14,X+ + ld r15,X+ + ldd r0,Z+12 + eor r14,r0 + ldd r0,Z+13 + eor r15,r0 + ld r20,X+ + ld r21,X+ + ldd r0,Z+16 + eor r20,r0 + ldd r0,Z+17 + eor r21,r0 + std Y+17,r20 + std Y+18,r21 + ld r20,X+ + ld r21,X+ + ldd r0,Z+20 + eor r20,r0 + ldd r0,Z+21 + eor r21,r0 + std Y+21,r20 + std Y+22,r21 + ld r20,X+ + ld r21,X+ + ldd r0,Z+24 + eor r20,r0 + ldd r0,Z+25 + eor r21,r0 + std Y+25,r20 + std Y+26,r21 + ld r20,X+ + ld r21,X+ + ldd r0,Z+28 + eor r20,r0 + ldd r0,Z+29 + eor r21,r0 + std Y+29,r20 + std Y+30,r21 + ld r4,X+ + ld r5,X+ + ldd r0,Z+2 + eor r4,r0 + ldd r0,Z+3 + eor r5,r0 + ld r8,X+ + ld r9,X+ + ldd r0,Z+6 + eor r8,r0 + ldd r0,Z+7 + eor r9,r0 + ld r12,X+ + ld r13,X+ + ldd r0,Z+10 + eor r12,r0 + ldd r0,Z+11 + eor r13,r0 + ld r24,X+ + ld r25,X+ + ldd r0,Z+14 + eor r24,r0 + ldd r0,Z+15 + eor r25,r0 + ld r20,X+ + ld r21,X+ + ldd r0,Z+18 + eor r20,r0 + ldd r0,Z+19 + eor r21,r0 + std Y+19,r20 + std Y+20,r21 + ld r20,X+ + ld r21,X+ + ldd r0,Z+22 + eor r20,r0 + ldd r0,Z+23 + eor r21,r0 + std Y+23,r20 + std Y+24,r21 + ld r20,X+ + ld r21,X+ + ldd r0,Z+26 + eor r20,r0 + ldd r0,Z+27 + eor r21,r0 + std Y+27,r20 + std Y+28,r21 + ld r20,X+ + ld r21,X+ + ldd r0,Z+30 + eor r20,r0 + ldd r0,Z+31 + eor r21,r0 + std Y+31,r20 + std Y+32,r21 + ldi r16,5 + cpi r18,60 + cpc r19,r1 + brcs 120f + ldi r16,8 + ldi r17,4 + add r18,r17 + adc r19,r1 +120: + lsl r18 + rol r19 + lsl r18 + rol r19 + rjmp 1447f +126: + mov r0,r6 + and r0,r10 + eor r2,r0 + mov r0,r7 + and r0,r11 + eor r3,r0 + mov r0,r8 + and r0,r12 + eor r4,r0 + mov r0,r9 + and r0,r13 + eor r5,r0 + mov r0,r2 + or r0,r14 + eor r6,r0 + mov r0,r3 + or r0,r15 + eor r7,r0 + mov r0,r4 + or r0,r24 + eor r8,r0 + mov r0,r5 + or r0,r25 + eor r9,r0 + mov r0,r6 + or r0,r10 + eor r14,r0 + mov r0,r7 + or r0,r11 + eor r15,r0 + mov r0,r8 + or r0,r12 + eor r24,r0 + mov r0,r9 + or r0,r13 + eor r25,r0 + mov r0,r6 + and r0,r14 + eor r10,r0 + mov r0,r7 + and r0,r15 + eor r11,r0 + mov r0,r8 + and r0,r24 + eor r12,r0 + mov r0,r9 + and r0,r25 + eor r13,r0 + mov r0,r2 + or r0,r10 + eor r6,r0 + mov r0,r3 + or r0,r11 + eor r7,r0 + mov r0,r4 + or r0,r12 + eor r8,r0 + mov r0,r5 + or r0,r13 + eor r9,r0 + mov r0,r6 + or r0,r14 + eor r2,r0 + mov r0,r7 + or r0,r15 + eor r3,r0 + mov r0,r8 + or r0,r24 + eor r4,r0 + mov r0,r9 + or r0,r25 + eor r5,r0 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Y+17 + ldd r3,Y+18 + ldd r4,Y+19 + ldd r5,Y+20 + ldd r6,Y+21 + ldd r7,Y+22 + ldd r8,Y+23 + ldd r9,Y+24 + ldd r10,Y+25 + ldd r11,Y+26 + ldd r12,Y+27 + ldd r13,Y+28 + ldd r14,Y+29 + ldd r15,Y+30 + ldd r24,Y+31 + ldd r25,Y+32 + mov r0,r6 + and r0,r10 + eor r2,r0 + mov r0,r7 + and r0,r11 + eor r3,r0 + mov r0,r8 + and r0,r12 + eor r4,r0 + mov r0,r9 + and r0,r13 + eor r5,r0 + mov r0,r2 + or r0,r14 + eor r6,r0 + mov r0,r3 + or r0,r15 + eor r7,r0 + mov r0,r4 + or r0,r24 + eor r8,r0 + mov r0,r5 + or r0,r25 + eor r9,r0 + mov r0,r6 + or r0,r10 + eor r14,r0 + mov r0,r7 + or r0,r11 + eor r15,r0 + mov r0,r8 + or r0,r12 + eor r24,r0 + mov r0,r9 + or r0,r13 + eor r25,r0 + mov r0,r6 + and r0,r14 + eor r10,r0 + mov r0,r7 + and r0,r15 + eor r11,r0 + mov r0,r8 + and r0,r24 + eor r12,r0 + mov r0,r9 + and r0,r25 + eor r13,r0 + mov r0,r2 + or r0,r10 + eor r6,r0 + mov r0,r3 + or r0,r11 + eor r7,r0 + mov r0,r4 + or r0,r12 + eor r8,r0 + mov r0,r5 + or r0,r13 + eor r9,r0 + mov r0,r6 + or r0,r14 + eor r2,r0 + mov r0,r7 + or r0,r15 + eor r3,r0 + mov r0,r8 + or r0,r24 + eor r4,r0 + mov r0,r9 + or r0,r25 + eor r5,r0 + ldd r0,Y+5 + eor r0,r14 + std Y+5,r0 + ldd r0,Y+6 + eor r0,r15 + std Y+6,r0 + ldd r0,Y+7 + eor r0,r24 + std Y+7,r0 + ldd r0,Y+8 + eor r0,r25 + std Y+8,r0 + ldd r0,Y+9 + eor r0,r6 + std Y+9,r0 + ldd r0,Y+10 + eor r0,r7 + std Y+10,r0 + ldd r0,Y+11 + eor r0,r8 + std Y+11,r0 + ldd r0,Y+12 + eor r0,r9 + std Y+12,r0 + ldd r0,Y+13 + eor r0,r2 + std Y+13,r0 + ldd r0,Y+14 + eor r0,r3 + std Y+14,r0 + ldd r0,Y+15 + eor r0,r4 + std Y+15,r0 + ldd r0,Y+16 + eor r0,r5 + std Y+16,r0 + ldd r0,Y+1 + eor r0,r10 + std Y+1,r0 + ldd r0,Y+2 + eor r0,r11 + std Y+2,r0 + ldd r0,Y+3 + eor r0,r12 + std Y+3,r0 + ldd r0,Y+4 + eor r0,r13 + std Y+4,r0 + movw r20,r14 + movw r22,r24 + movw r14,r6 + movw r24,r8 + movw r6,r2 + movw r8,r4 + movw r2,r10 + movw r4,r12 + movw r10,r20 + movw r12,r22 + eor r10,r14 + eor r11,r15 + eor r12,r24 + eor r13,r25 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+9 + ldd r23,Y+10 + eor r20,r22 + eor r21,r23 + std Y+5,r22 + std Y+6,r23 + ldd r22,Y+13 + ldd r23,Y+14 + std Y+9,r22 + std Y+10,r23 + ldd r22,Y+1 + ldd r23,Y+2 + std Y+13,r22 + std Y+14,r23 + std Y+1,r20 + std Y+2,r21 + ldd r20,Y+7 + ldd r21,Y+8 + ldd r22,Y+11 + ldd r23,Y+12 + eor r20,r22 + eor r21,r23 + std Y+7,r22 + std Y+8,r23 + ldd r22,Y+15 + ldd r23,Y+16 + std Y+11,r22 + std Y+12,r23 + ldd r22,Y+3 + ldd r23,Y+4 + std Y+15,r22 + std Y+16,r23 + std Y+3,r20 + std Y+4,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+9 + ldd r23,Y+10 + eor r20,r22 + eor r21,r23 + eor r22,r14 + eor r23,r15 + std Y+5,r22 + std Y+6,r23 + ldd r22,Y+13 + ldd r23,Y+14 + eor r22,r6 + eor r23,r7 + std Y+9,r22 + std Y+10,r23 + ldd r22,Y+1 + ldd r23,Y+2 + eor r22,r2 + eor r23,r3 + std Y+13,r22 + std Y+14,r23 + eor r20,r10 + eor r21,r11 + std Y+1,r20 + std Y+2,r21 + ldd r20,Y+7 + ldd r21,Y+8 + ldd r22,Y+11 + ldd r23,Y+12 + eor r20,r22 + eor r21,r23 + eor r22,r24 + eor r23,r25 + std Y+7,r22 + std Y+8,r23 + ldd r22,Y+15 + ldd r23,Y+16 + eor r22,r8 + eor r23,r9 + std Y+11,r22 + std Y+12,r23 + ldd r22,Y+3 + ldd r23,Y+4 + eor r22,r4 + eor r23,r5 + std Y+15,r22 + std Y+16,r23 + eor r20,r12 + eor r21,r13 + std Y+3,r20 + std Y+4,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + mov r0,r6 + and r0,r2 + eor r14,r0 + mov r0,r7 + and r0,r3 + eor r15,r0 + mov r0,r8 + and r0,r4 + eor r24,r0 + mov r0,r9 + and r0,r5 + eor r25,r0 + mov r0,r14 + or r0,r10 + eor r6,r0 + mov r0,r15 + or r0,r11 + eor r7,r0 + mov r0,r24 + or r0,r12 + eor r8,r0 + mov r0,r25 + or r0,r13 + eor r9,r0 + mov r0,r6 + or r0,r2 + eor r10,r0 + mov r0,r7 + or r0,r3 + eor r11,r0 + mov r0,r8 + or r0,r4 + eor r12,r0 + mov r0,r9 + or r0,r5 + eor r13,r0 + mov r0,r6 + and r0,r10 + eor r2,r0 + mov r0,r7 + and r0,r11 + eor r3,r0 + mov r0,r8 + and r0,r12 + eor r4,r0 + mov r0,r9 + and r0,r13 + eor r5,r0 + mov r0,r14 + or r0,r2 + eor r6,r0 + mov r0,r15 + or r0,r3 + eor r7,r0 + mov r0,r24 + or r0,r4 + eor r8,r0 + mov r0,r25 + or r0,r5 + eor r9,r0 + mov r0,r6 + or r0,r10 + eor r14,r0 + mov r0,r7 + or r0,r11 + eor r15,r0 + mov r0,r8 + or r0,r12 + eor r24,r0 + mov r0,r9 + or r0,r13 + eor r25,r0 + movw r20,r14 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r14,r22 + and r15,r22 + lsl r14 + rol r15 + or r14,r20 + or r15,r21 + movw r20,r24 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r24,r22 + and r25,r22 + lsl r24 + rol r25 + lsl r24 + rol r25 + lsl r24 + rol r25 + or r24,r20 + or r25,r21 + movw r20,r6 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r6,r22 + and r7,r22 + lsl r6 + rol r7 + or r6,r20 + or r7,r21 + movw r20,r8 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r8,r22 + and r9,r22 + lsl r8 + rol r9 + lsl r8 + rol r9 + lsl r8 + rol r9 + or r8,r20 + or r9,r21 + movw r20,r2 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r2,r22 + and r3,r22 + lsl r2 + rol r3 + or r2,r20 + or r3,r21 + movw r20,r4 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r4,r22 + and r5,r22 + lsl r4 + rol r5 + lsl r4 + rol r5 + lsl r4 + rol r5 + or r4,r20 + or r5,r21 + movw r20,r10 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r10,r22 + and r11,r22 + lsl r10 + rol r11 + or r10,r20 + or r11,r21 + movw r20,r12 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r12,r22 + and r13,r22 + lsl r12 + rol r13 + lsl r12 + rol r13 + lsl r12 + rol r13 + or r12,r20 + or r13,r21 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r3,Y+2 + ldd r4,Y+3 + ldd r5,Y+4 + ldd r6,Y+5 + ldd r7,Y+6 + ldd r8,Y+7 + ldd r9,Y+8 + ldd r10,Y+9 + ldd r11,Y+10 + ldd r12,Y+11 + ldd r13,Y+12 + ldd r14,Y+13 + ldd r15,Y+14 + ldd r24,Y+15 + ldd r25,Y+16 + mov r0,r10 + and r0,r14 + eor r6,r0 + mov r0,r11 + and r0,r15 + eor r7,r0 + mov r0,r12 + and r0,r24 + eor r8,r0 + mov r0,r13 + and r0,r25 + eor r9,r0 + mov r0,r6 + or r0,r2 + eor r10,r0 + mov r0,r7 + or r0,r3 + eor r11,r0 + mov r0,r8 + or r0,r4 + eor r12,r0 + mov r0,r9 + or r0,r5 + eor r13,r0 + mov r0,r10 + or r0,r14 + eor r2,r0 + mov r0,r11 + or r0,r15 + eor r3,r0 + mov r0,r12 + or r0,r24 + eor r4,r0 + mov r0,r13 + or r0,r25 + eor r5,r0 + mov r0,r10 + and r0,r2 + eor r14,r0 + mov r0,r11 + and r0,r3 + eor r15,r0 + mov r0,r12 + and r0,r4 + eor r24,r0 + mov r0,r13 + and r0,r5 + eor r25,r0 + mov r0,r6 + or r0,r14 + eor r10,r0 + mov r0,r7 + or r0,r15 + eor r11,r0 + mov r0,r8 + or r0,r24 + eor r12,r0 + mov r0,r9 + or r0,r25 + eor r13,r0 + mov r0,r10 + or r0,r2 + eor r6,r0 + mov r0,r11 + or r0,r3 + eor r7,r0 + mov r0,r12 + or r0,r4 + eor r8,r0 + mov r0,r13 + or r0,r5 + eor r9,r0 + movw r20,r8 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r8,r22 + and r9,r22 + lsl r8 + rol r9 + lsl r8 + rol r9 + or r8,r20 + or r9,r21 + movw r20,r12 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r12,r22 + and r13,r22 + lsl r12 + rol r13 + lsl r12 + rol r13 + or r12,r20 + or r13,r21 + movw r20,r24 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r24,r22 + and r25,r22 + lsl r24 + rol r25 + lsl r24 + rol r25 + or r24,r20 + or r25,r21 + movw r20,r4 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r4,r22 + and r5,r22 + lsl r4 + rol r5 + lsl r4 + rol r5 + or r4,r20 + or r5,r21 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Y+17 + ldd r3,Y+18 + ldd r4,Y+19 + ldd r5,Y+20 + ldd r6,Y+21 + ldd r7,Y+22 + ldd r8,Y+23 + ldd r9,Y+24 + ldd r10,Y+25 + ldd r11,Y+26 + ldd r12,Y+27 + ldd r13,Y+28 + ldd r14,Y+29 + ldd r15,Y+30 + ldd r24,Y+31 + ldd r25,Y+32 + ldd r0,Y+9 + eor r0,r10 + std Y+9,r0 + ldd r0,Y+10 + eor r0,r11 + std Y+10,r0 + ldd r0,Y+11 + eor r0,r12 + std Y+11,r0 + ldd r0,Y+12 + eor r0,r13 + std Y+12,r0 + ldd r0,Y+13 + eor r0,r6 + std Y+13,r0 + ldd r0,Y+14 + eor r0,r7 + std Y+14,r0 + ldd r0,Y+15 + eor r0,r8 + std Y+15,r0 + ldd r0,Y+16 + eor r0,r9 + std Y+16,r0 + ldd r0,Y+1 + eor r0,r14 + std Y+1,r0 + ldd r0,Y+2 + eor r0,r15 + std Y+2,r0 + ldd r0,Y+3 + eor r0,r24 + std Y+3,r0 + ldd r0,Y+4 + eor r0,r25 + std Y+4,r0 + ldd r0,Y+5 + eor r0,r2 + std Y+5,r0 + ldd r0,Y+6 + eor r0,r3 + std Y+6,r0 + ldd r0,Y+7 + eor r0,r4 + std Y+7,r0 + ldd r0,Y+8 + eor r0,r5 + std Y+8,r0 + movw r20,r10 + movw r22,r12 + movw r10,r6 + movw r12,r8 + movw r6,r14 + movw r8,r24 + movw r14,r2 + movw r24,r4 + movw r2,r20 + movw r4,r22 + eor r2,r10 + eor r3,r11 + eor r4,r12 + eor r5,r13 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+13 + ldd r23,Y+14 + eor r20,r22 + eor r21,r23 + std Y+9,r22 + std Y+10,r23 + ldd r22,Y+1 + ldd r23,Y+2 + std Y+13,r22 + std Y+14,r23 + ldd r22,Y+5 + ldd r23,Y+6 + std Y+1,r22 + std Y+2,r23 + std Y+5,r20 + std Y+6,r21 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r22,Y+15 + ldd r23,Y+16 + eor r20,r22 + eor r21,r23 + std Y+11,r22 + std Y+12,r23 + ldd r22,Y+3 + ldd r23,Y+4 + std Y+15,r22 + std Y+16,r23 + ldd r22,Y+7 + ldd r23,Y+8 + std Y+3,r22 + std Y+4,r23 + std Y+7,r20 + std Y+8,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+13 + ldd r23,Y+14 + eor r20,r22 + eor r21,r23 + eor r22,r10 + eor r23,r11 + std Y+9,r22 + std Y+10,r23 + ldd r22,Y+1 + ldd r23,Y+2 + eor r22,r6 + eor r23,r7 + std Y+13,r22 + std Y+14,r23 + ldd r22,Y+5 + ldd r23,Y+6 + eor r22,r14 + eor r23,r15 + std Y+1,r22 + std Y+2,r23 + eor r20,r2 + eor r21,r3 + std Y+5,r20 + std Y+6,r21 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r22,Y+15 + ldd r23,Y+16 + eor r20,r22 + eor r21,r23 + eor r22,r12 + eor r23,r13 + std Y+11,r22 + std Y+12,r23 + ldd r22,Y+3 + ldd r23,Y+4 + eor r22,r8 + eor r23,r9 + std Y+15,r22 + std Y+16,r23 + ldd r22,Y+7 + ldd r23,Y+8 + eor r22,r24 + eor r23,r25 + std Y+3,r22 + std Y+4,r23 + eor r20,r4 + eor r21,r5 + std Y+7,r20 + std Y+8,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + movw r20,r12 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r12,r22 + and r13,r22 + lsl r12 + rol r13 + or r12,r20 + or r13,r21 + movw r20,r10 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r10,r22 + and r11,r22 + lsl r10 + rol r11 + lsl r10 + rol r11 + lsl r10 + rol r11 + or r10,r20 + or r11,r21 + movw r20,r8 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r8,r22 + and r9,r22 + lsl r8 + rol r9 + or r8,r20 + or r9,r21 + movw r20,r6 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r6,r22 + and r7,r22 + lsl r6 + rol r7 + lsl r6 + rol r7 + lsl r6 + rol r7 + or r6,r20 + or r7,r21 + movw r20,r24 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r24,r22 + and r25,r22 + lsl r24 + rol r25 + or r24,r20 + or r25,r21 + movw r20,r14 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r14,r22 + and r15,r22 + lsl r14 + rol r15 + lsl r14 + rol r15 + lsl r14 + rol r15 + or r14,r20 + or r15,r21 + movw r20,r4 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r4,r22 + and r5,r22 + lsl r4 + rol r5 + or r4,r20 + or r5,r21 + movw r20,r2 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r2,r22 + and r3,r22 + lsl r2 + rol r3 + lsl r2 + rol r3 + lsl r2 + rol r3 + or r2,r20 + or r3,r21 + ldd r0,Z+48 + eor r10,r0 + ldd r0,Z+49 + eor r11,r0 + ldd r0,Z+50 + eor r12,r0 + ldd r0,Z+51 + eor r13,r0 + ldd r0,Z+52 + eor r6,r0 + ldd r0,Z+53 + eor r7,r0 + ldd r0,Z+54 + eor r8,r0 + ldd r0,Z+55 + eor r9,r0 + ldd r0,Z+56 + eor r14,r0 + ldd r0,Z+57 + eor r15,r0 + ldd r0,Z+58 + eor r24,r0 + ldd r0,Z+59 + eor r25,r0 + ldd r0,Z+60 + eor r2,r0 + ldd r0,Z+61 + eor r3,r0 + ldd r0,Z+62 + eor r4,r0 + ldd r0,Z+63 + eor r5,r0 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r3,Y+2 + ldd r4,Y+3 + ldd r5,Y+4 + ldd r6,Y+5 + ldd r7,Y+6 + ldd r8,Y+7 + ldd r9,Y+8 + ldd r10,Y+9 + ldd r11,Y+10 + ldd r12,Y+11 + ldd r13,Y+12 + ldd r14,Y+13 + ldd r15,Y+14 + ldd r24,Y+15 + ldd r25,Y+16 + movw r20,r12 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r12,r22 + and r13,r22 + lsl r12 + rol r13 + lsl r12 + rol r13 + or r12,r20 + or r13,r21 + movw r20,r24 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r24,r22 + and r25,r22 + lsl r24 + rol r25 + lsl r24 + rol r25 + or r24,r20 + or r25,r21 + movw r20,r4 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r4,r22 + and r5,r22 + lsl r4 + rol r5 + lsl r4 + rol r5 + or r4,r20 + or r5,r21 + movw r20,r8 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r8,r22 + and r9,r22 + lsl r8 + rol r9 + lsl r8 + rol r9 + or r8,r20 + or r9,r21 + ldd r0,Z+32 + eor r10,r0 + ldd r0,Z+33 + eor r11,r0 + ldd r0,Z+34 + eor r12,r0 + ldd r0,Z+35 + eor r13,r0 + ldd r0,Z+36 + eor r14,r0 + ldd r0,Z+37 + eor r15,r0 + ldd r0,Z+38 + eor r24,r0 + ldd r0,Z+39 + eor r25,r0 + ldd r0,Z+40 + eor r2,r0 + ldd r0,Z+41 + eor r3,r0 + ldd r0,Z+42 + eor r4,r0 + ldd r0,Z+43 + eor r5,r0 + ldd r0,Z+44 + eor r6,r0 + ldd r0,Z+45 + eor r7,r0 + ldd r0,Z+46 + eor r8,r0 + ldd r0,Z+47 + eor r9,r0 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r17,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r17 +#endif + add r31,r19 + mov r30,r18 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + inc r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + inc r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r22,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r22,Z +#elif defined(__AVR_TINY__) + ld r22,Z +#else + lpm + mov r22,r0 +#endif + inc r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r23,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r23,Z +#elif defined(__AVR_TINY__) + ld r23,Z +#else + lpm + mov r23,r0 +#endif + inc r18 + eor r10,r20 + eor r11,r21 + eor r12,r22 + eor r13,r23 +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + ret +1447: + rcall 126b + rcall 1453f + dec r16 + brne 1447b + rjmp 2622f +1453: + mov r0,r14 + and r0,r2 + eor r10,r0 + mov r0,r15 + and r0,r3 + eor r11,r0 + mov r0,r24 + and r0,r4 + eor r12,r0 + mov r0,r25 + and r0,r5 + eor r13,r0 + mov r0,r10 + or r0,r6 + eor r14,r0 + mov r0,r11 + or r0,r7 + eor r15,r0 + mov r0,r12 + or r0,r8 + eor r24,r0 + mov r0,r13 + or r0,r9 + eor r25,r0 + mov r0,r14 + or r0,r2 + eor r6,r0 + mov r0,r15 + or r0,r3 + eor r7,r0 + mov r0,r24 + or r0,r4 + eor r8,r0 + mov r0,r25 + or r0,r5 + eor r9,r0 + mov r0,r14 + and r0,r6 + eor r2,r0 + mov r0,r15 + and r0,r7 + eor r3,r0 + mov r0,r24 + and r0,r8 + eor r4,r0 + mov r0,r25 + and r0,r9 + eor r5,r0 + mov r0,r10 + or r0,r2 + eor r14,r0 + mov r0,r11 + or r0,r3 + eor r15,r0 + mov r0,r12 + or r0,r4 + eor r24,r0 + mov r0,r13 + or r0,r5 + eor r25,r0 + mov r0,r14 + or r0,r6 + eor r10,r0 + mov r0,r15 + or r0,r7 + eor r11,r0 + mov r0,r24 + or r0,r8 + eor r12,r0 + mov r0,r25 + or r0,r9 + eor r13,r0 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Y+17 + ldd r3,Y+18 + ldd r4,Y+19 + ldd r5,Y+20 + ldd r6,Y+21 + ldd r7,Y+22 + ldd r8,Y+23 + ldd r9,Y+24 + ldd r10,Y+25 + ldd r11,Y+26 + ldd r12,Y+27 + ldd r13,Y+28 + ldd r14,Y+29 + ldd r15,Y+30 + ldd r24,Y+31 + ldd r25,Y+32 + mov r0,r6 + and r0,r14 + eor r10,r0 + mov r0,r7 + and r0,r15 + eor r11,r0 + mov r0,r8 + and r0,r24 + eor r12,r0 + mov r0,r9 + and r0,r25 + eor r13,r0 + mov r0,r10 + or r0,r2 + eor r6,r0 + mov r0,r11 + or r0,r3 + eor r7,r0 + mov r0,r12 + or r0,r4 + eor r8,r0 + mov r0,r13 + or r0,r5 + eor r9,r0 + mov r0,r6 + or r0,r14 + eor r2,r0 + mov r0,r7 + or r0,r15 + eor r3,r0 + mov r0,r8 + or r0,r24 + eor r4,r0 + mov r0,r9 + or r0,r25 + eor r5,r0 + mov r0,r6 + and r0,r2 + eor r14,r0 + mov r0,r7 + and r0,r3 + eor r15,r0 + mov r0,r8 + and r0,r4 + eor r24,r0 + mov r0,r9 + and r0,r5 + eor r25,r0 + mov r0,r10 + or r0,r14 + eor r6,r0 + mov r0,r11 + or r0,r15 + eor r7,r0 + mov r0,r12 + or r0,r24 + eor r8,r0 + mov r0,r13 + or r0,r25 + eor r9,r0 + mov r0,r6 + or r0,r2 + eor r10,r0 + mov r0,r7 + or r0,r3 + eor r11,r0 + mov r0,r8 + or r0,r4 + eor r12,r0 + mov r0,r9 + or r0,r5 + eor r13,r0 + ldd r0,Y+13 + eor r0,r2 + std Y+13,r0 + ldd r0,Y+14 + eor r0,r3 + std Y+14,r0 + ldd r0,Y+15 + eor r0,r4 + std Y+15,r0 + ldd r0,Y+16 + eor r0,r5 + std Y+16,r0 + ldd r0,Y+1 + eor r0,r6 + std Y+1,r0 + ldd r0,Y+2 + eor r0,r7 + std Y+2,r0 + ldd r0,Y+3 + eor r0,r8 + std Y+3,r0 + ldd r0,Y+4 + eor r0,r9 + std Y+4,r0 + ldd r0,Y+5 + eor r0,r10 + std Y+5,r0 + ldd r0,Y+6 + eor r0,r11 + std Y+6,r0 + ldd r0,Y+7 + eor r0,r12 + std Y+7,r0 + ldd r0,Y+8 + eor r0,r13 + std Y+8,r0 + ldd r0,Y+9 + eor r0,r14 + std Y+9,r0 + ldd r0,Y+10 + eor r0,r15 + std Y+10,r0 + ldd r0,Y+11 + eor r0,r24 + std Y+11,r0 + ldd r0,Y+12 + eor r0,r25 + std Y+12,r0 + movw r20,r2 + movw r22,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r20 + movw r24,r22 + eor r14,r2 + eor r15,r3 + eor r24,r4 + eor r25,r5 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+1 + ldd r23,Y+2 + eor r20,r22 + eor r21,r23 + std Y+13,r22 + std Y+14,r23 + ldd r22,Y+5 + ldd r23,Y+6 + std Y+1,r22 + std Y+2,r23 + ldd r22,Y+9 + ldd r23,Y+10 + std Y+5,r22 + std Y+6,r23 + std Y+9,r20 + std Y+10,r21 + ldd r20,Y+15 + ldd r21,Y+16 + ldd r22,Y+3 + ldd r23,Y+4 + eor r20,r22 + eor r21,r23 + std Y+15,r22 + std Y+16,r23 + ldd r22,Y+7 + ldd r23,Y+8 + std Y+3,r22 + std Y+4,r23 + ldd r22,Y+11 + ldd r23,Y+12 + std Y+7,r22 + std Y+8,r23 + std Y+11,r20 + std Y+12,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+1 + ldd r23,Y+2 + eor r20,r22 + eor r21,r23 + eor r22,r2 + eor r23,r3 + std Y+13,r22 + std Y+14,r23 + ldd r22,Y+5 + ldd r23,Y+6 + eor r22,r6 + eor r23,r7 + std Y+1,r22 + std Y+2,r23 + ldd r22,Y+9 + ldd r23,Y+10 + eor r22,r10 + eor r23,r11 + std Y+5,r22 + std Y+6,r23 + eor r20,r14 + eor r21,r15 + std Y+9,r20 + std Y+10,r21 + ldd r20,Y+15 + ldd r21,Y+16 + ldd r22,Y+3 + ldd r23,Y+4 + eor r20,r22 + eor r21,r23 + eor r22,r4 + eor r23,r5 + std Y+15,r22 + std Y+16,r23 + ldd r22,Y+7 + ldd r23,Y+8 + eor r22,r8 + eor r23,r9 + std Y+3,r22 + std Y+4,r23 + ldd r22,Y+11 + ldd r23,Y+12 + eor r22,r12 + eor r23,r13 + std Y+7,r22 + std Y+8,r23 + eor r20,r24 + eor r21,r25 + std Y+11,r20 + std Y+12,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + mov r0,r6 + and r0,r10 + eor r2,r0 + mov r0,r7 + and r0,r11 + eor r3,r0 + mov r0,r8 + and r0,r12 + eor r4,r0 + mov r0,r9 + and r0,r13 + eor r5,r0 + mov r0,r2 + or r0,r14 + eor r6,r0 + mov r0,r3 + or r0,r15 + eor r7,r0 + mov r0,r4 + or r0,r24 + eor r8,r0 + mov r0,r5 + or r0,r25 + eor r9,r0 + mov r0,r6 + or r0,r10 + eor r14,r0 + mov r0,r7 + or r0,r11 + eor r15,r0 + mov r0,r8 + or r0,r12 + eor r24,r0 + mov r0,r9 + or r0,r13 + eor r25,r0 + mov r0,r6 + and r0,r14 + eor r10,r0 + mov r0,r7 + and r0,r15 + eor r11,r0 + mov r0,r8 + and r0,r24 + eor r12,r0 + mov r0,r9 + and r0,r25 + eor r13,r0 + mov r0,r2 + or r0,r10 + eor r6,r0 + mov r0,r3 + or r0,r11 + eor r7,r0 + mov r0,r4 + or r0,r12 + eor r8,r0 + mov r0,r5 + or r0,r13 + eor r9,r0 + mov r0,r6 + or r0,r14 + eor r2,r0 + mov r0,r7 + or r0,r15 + eor r3,r0 + mov r0,r8 + or r0,r24 + eor r4,r0 + mov r0,r9 + or r0,r25 + eor r5,r0 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + mov r0,r25 + mov r25,r24 + mov r24,r0 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + mov r0,r9 + mov r9,r8 + mov r8,r0 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + mov r0,r5 + mov r5,r4 + mov r4,r0 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + mov r0,r13 + mov r13,r12 + mov r12,r0 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r3,Y+2 + ldd r4,Y+3 + ldd r5,Y+4 + ldd r6,Y+5 + ldd r7,Y+6 + ldd r8,Y+7 + ldd r9,Y+8 + ldd r10,Y+9 + ldd r11,Y+10 + ldd r12,Y+11 + ldd r13,Y+12 + ldd r14,Y+13 + ldd r15,Y+14 + ldd r24,Y+15 + ldd r25,Y+16 + mov r0,r2 + and r0,r6 + eor r14,r0 + mov r0,r3 + and r0,r7 + eor r15,r0 + mov r0,r4 + and r0,r8 + eor r24,r0 + mov r0,r5 + and r0,r9 + eor r25,r0 + mov r0,r14 + or r0,r10 + eor r2,r0 + mov r0,r15 + or r0,r11 + eor r3,r0 + mov r0,r24 + or r0,r12 + eor r4,r0 + mov r0,r25 + or r0,r13 + eor r5,r0 + mov r0,r2 + or r0,r6 + eor r10,r0 + mov r0,r3 + or r0,r7 + eor r11,r0 + mov r0,r4 + or r0,r8 + eor r12,r0 + mov r0,r5 + or r0,r9 + eor r13,r0 + mov r0,r2 + and r0,r10 + eor r6,r0 + mov r0,r3 + and r0,r11 + eor r7,r0 + mov r0,r4 + and r0,r12 + eor r8,r0 + mov r0,r5 + and r0,r13 + eor r9,r0 + mov r0,r14 + or r0,r6 + eor r2,r0 + mov r0,r15 + or r0,r7 + eor r3,r0 + mov r0,r24 + or r0,r8 + eor r4,r0 + mov r0,r25 + or r0,r9 + eor r5,r0 + mov r0,r2 + or r0,r10 + eor r14,r0 + mov r0,r3 + or r0,r11 + eor r15,r0 + mov r0,r4 + or r0,r12 + eor r24,r0 + mov r0,r5 + or r0,r13 + eor r25,r0 + mov r0,r5 + mov r5,r4 + mov r4,r0 + mov r0,r9 + mov r9,r8 + mov r8,r0 + mov r0,r13 + mov r13,r12 + mov r12,r0 + mov r0,r25 + mov r25,r24 + mov r24,r0 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Y+17 + ldd r3,Y+18 + ldd r4,Y+19 + ldd r5,Y+20 + ldd r6,Y+21 + ldd r7,Y+22 + ldd r8,Y+23 + ldd r9,Y+24 + ldd r10,Y+25 + ldd r11,Y+26 + ldd r12,Y+27 + ldd r13,Y+28 + ldd r14,Y+29 + ldd r15,Y+30 + ldd r24,Y+31 + ldd r25,Y+32 + ldd r0,Y+1 + eor r0,r14 + std Y+1,r0 + ldd r0,Y+2 + eor r0,r15 + std Y+2,r0 + ldd r0,Y+3 + eor r0,r24 + std Y+3,r0 + ldd r0,Y+4 + eor r0,r25 + std Y+4,r0 + ldd r0,Y+5 + eor r0,r6 + std Y+5,r0 + ldd r0,Y+6 + eor r0,r7 + std Y+6,r0 + ldd r0,Y+7 + eor r0,r8 + std Y+7,r0 + ldd r0,Y+8 + eor r0,r9 + std Y+8,r0 + ldd r0,Y+9 + eor r0,r2 + std Y+9,r0 + ldd r0,Y+10 + eor r0,r3 + std Y+10,r0 + ldd r0,Y+11 + eor r0,r4 + std Y+11,r0 + ldd r0,Y+12 + eor r0,r5 + std Y+12,r0 + ldd r0,Y+13 + eor r0,r10 + std Y+13,r0 + ldd r0,Y+14 + eor r0,r11 + std Y+14,r0 + ldd r0,Y+15 + eor r0,r12 + std Y+15,r0 + ldd r0,Y+16 + eor r0,r13 + std Y+16,r0 + movw r20,r14 + movw r22,r24 + movw r14,r6 + movw r24,r8 + movw r6,r2 + movw r8,r4 + movw r2,r10 + movw r4,r12 + movw r10,r20 + movw r12,r22 + eor r10,r14 + eor r11,r15 + eor r12,r24 + eor r13,r25 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+5 + ldd r23,Y+6 + eor r20,r22 + eor r21,r23 + std Y+1,r22 + std Y+2,r23 + ldd r22,Y+9 + ldd r23,Y+10 + std Y+5,r22 + std Y+6,r23 + ldd r22,Y+13 + ldd r23,Y+14 + std Y+9,r22 + std Y+10,r23 + std Y+13,r20 + std Y+14,r21 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r22,Y+7 + ldd r23,Y+8 + eor r20,r22 + eor r21,r23 + std Y+3,r22 + std Y+4,r23 + ldd r22,Y+11 + ldd r23,Y+12 + std Y+7,r22 + std Y+8,r23 + ldd r22,Y+15 + ldd r23,Y+16 + std Y+11,r22 + std Y+12,r23 + std Y+15,r20 + std Y+16,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+5 + ldd r23,Y+6 + eor r20,r22 + eor r21,r23 + eor r22,r14 + eor r23,r15 + std Y+1,r22 + std Y+2,r23 + ldd r22,Y+9 + ldd r23,Y+10 + eor r22,r6 + eor r23,r7 + std Y+5,r22 + std Y+6,r23 + ldd r22,Y+13 + ldd r23,Y+14 + eor r22,r2 + eor r23,r3 + std Y+9,r22 + std Y+10,r23 + eor r20,r10 + eor r21,r11 + std Y+13,r20 + std Y+14,r21 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r22,Y+7 + ldd r23,Y+8 + eor r20,r22 + eor r21,r23 + eor r22,r24 + eor r23,r25 + std Y+3,r22 + std Y+4,r23 + ldd r22,Y+11 + ldd r23,Y+12 + eor r22,r8 + eor r23,r9 + std Y+7,r22 + std Y+8,r23 + ldd r22,Y+15 + ldd r23,Y+16 + eor r22,r4 + eor r23,r5 + std Y+11,r22 + std Y+12,r23 + eor r20,r12 + eor r21,r13 + std Y+15,r20 + std Y+16,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + mov r0,r15 + mov r15,r14 + mov r14,r0 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + mov r0,r7 + mov r7,r6 + mov r6,r0 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + mov r0,r3 + mov r3,r2 + mov r2,r0 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + mov r0,r11 + mov r11,r10 + mov r10,r0 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r17,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r17 +#endif + add r31,r19 + mov r30,r18 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + inc r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + inc r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r22,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r22,Z +#elif defined(__AVR_TINY__) + ld r22,Z +#else + lpm + mov r22,r0 +#endif + inc r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r23,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r23,Z +#elif defined(__AVR_TINY__) + ld r23,Z +#else + lpm + mov r23,r0 +#endif + inc r18 +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + ldd r0,Z+16 + eor r14,r0 + ldd r0,Z+17 + eor r15,r0 + ldd r0,Z+18 + eor r24,r0 + ldd r0,Z+19 + eor r25,r0 + ldd r0,Z+20 + eor r6,r0 + ldd r0,Z+21 + eor r7,r0 + ldd r0,Z+22 + eor r8,r0 + ldd r0,Z+23 + eor r9,r0 + ldd r0,Z+24 + eor r2,r0 + ldd r0,Z+25 + eor r3,r0 + ldd r0,Z+26 + eor r4,r0 + ldd r0,Z+27 + eor r5,r0 + ldd r0,Z+28 + eor r10,r0 + ldd r0,Z+29 + eor r11,r0 + ldd r0,Z+30 + eor r12,r0 + ldd r0,Z+31 + eor r13,r0 + std Y+17,r14 + std Y+18,r15 + std Y+19,r24 + std Y+20,r25 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r2 + std Y+26,r3 + std Y+27,r4 + std Y+28,r5 + std Y+29,r10 + std Y+30,r11 + std Y+31,r12 + std Y+32,r13 + ldd r2,Y+1 + ldd r3,Y+2 + ldd r4,Y+3 + ldd r5,Y+4 + ldd r6,Y+5 + ldd r7,Y+6 + ldd r8,Y+7 + ldd r9,Y+8 + ldd r10,Y+9 + ldd r11,Y+10 + ldd r12,Y+11 + ldd r13,Y+12 + ldd r14,Y+13 + ldd r15,Y+14 + ldd r24,Y+15 + ldd r25,Y+16 + mov r0,r5 + mov r5,r4 + mov r4,r0 + mov r0,r9 + mov r9,r8 + mov r8,r0 + mov r0,r13 + mov r13,r12 + mov r12,r0 + mov r0,r25 + mov r25,r24 + mov r24,r0 + ld r0,Z + eor r2,r0 + ldd r0,Z+1 + eor r3,r0 + ldd r0,Z+2 + eor r4,r0 + ldd r0,Z+3 + eor r5,r0 + ldd r0,Z+4 + eor r6,r0 + ldd r0,Z+5 + eor r7,r0 + ldd r0,Z+6 + eor r8,r0 + ldd r0,Z+7 + eor r9,r0 + ldd r0,Z+8 + eor r10,r0 + ldd r0,Z+9 + eor r11,r0 + ldd r0,Z+10 + eor r12,r0 + ldd r0,Z+11 + eor r13,r0 + ldd r0,Z+12 + eor r14,r0 + ldd r0,Z+13 + eor r15,r0 + ldd r0,Z+14 + eor r24,r0 + ldd r0,Z+15 + eor r25,r0 + eor r2,r20 + eor r3,r21 + eor r4,r22 + eor r5,r23 + ret +2622: + ldd r26,Y+33 + ldd r27,Y+34 + st X+,r2 + st X+,r3 + st X+,r6 + st X+,r7 + st X+,r10 + st X+,r11 + st X+,r14 + st X+,r15 + ldd r20,Y+17 + ldd r21,Y+18 + st X+,r20 + st X+,r21 + ldd r20,Y+21 + ldd r21,Y+22 + st X+,r20 + st X+,r21 + ldd r20,Y+25 + ldd r21,Y+26 + st X+,r20 + st X+,r21 + ldd r20,Y+29 + ldd r21,Y+30 + st X+,r20 + st X+,r21 + st X+,r4 + st X+,r5 + st X+,r8 + st X+,r9 + st X+,r12 + st X+,r13 + st X+,r24 + st X+,r25 + ldd r20,Y+19 + ldd r21,Y+20 + st X+,r20 + st X+,r21 + ldd r20,Y+23 + ldd r21,Y+24 + st X+,r20 + st X+,r21 + ldd r20,Y+27 + ldd r21,Y+28 + st X+,r20 + st X+,r21 + ldd r20,Y+31 + ldd r21,Y+32 + st X+,r20 + st X+,r21 + adiw r28,34 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size saturnin_encrypt_block, .-saturnin_encrypt_block + + .text +.global saturnin_decrypt_block + .type saturnin_decrypt_block, @function +saturnin_decrypt_block: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r23 + push r22 + movw r30,r24 + movw r26,r20 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 52 + ld r2,X+ + ld r3,X+ + ld r6,X+ + ld r7,X+ + ld r10,X+ + ld r11,X+ + ld r14,X+ + ld r15,X+ + ld r20,X+ + ld r21,X+ + std Y+17,r20 + std Y+18,r21 + ld r20,X+ + ld r21,X+ + std Y+21,r20 + std Y+22,r21 + ld r20,X+ + ld r21,X+ + std Y+25,r20 + std Y+26,r21 + ld r20,X+ + ld r21,X+ + std Y+29,r20 + std Y+30,r21 + ld r4,X+ + ld r5,X+ + ld r8,X+ + ld r9,X+ + ld r12,X+ + ld r13,X+ + ld r24,X+ + ld r25,X+ + ld r20,X+ + ld r21,X+ + std Y+19,r20 + std Y+20,r21 + ld r20,X+ + ld r21,X+ + std Y+23,r20 + std Y+24,r21 + ld r20,X+ + ld r21,X+ + std Y+27,r20 + std Y+28,r21 + ld r20,X+ + ld r21,X+ + std Y+31,r20 + std Y+32,r21 + ldi r16,10 + cpi r18,60 + cpc r19,r1 + brcs 56f + ldi r16,16 + ldi r17,4 + add r18,r17 + adc r19,r1 +56: + add r18,r16 + adc r19,r1 + lsl r18 + rol r19 + lsl r18 + rol r19 + rjmp 1233f +64: + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r17,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r17 +#endif + add r31,r19 + dec r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r23,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r23,Z +#elif defined(__AVR_TINY__) + ld r23,Z +#else + lpm + mov r23,r0 +#endif + dec r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r22,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r22,Z +#elif defined(__AVR_TINY__) + ld r22,Z +#else + lpm + mov r22,r0 +#endif + dec r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + dec r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + eor r2,r20 + eor r3,r21 + eor r4,r22 + eor r5,r23 +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + ld r0,Z + eor r2,r0 + ldd r0,Z+1 + eor r3,r0 + ldd r0,Z+2 + eor r4,r0 + ldd r0,Z+3 + eor r5,r0 + ldd r0,Z+4 + eor r6,r0 + ldd r0,Z+5 + eor r7,r0 + ldd r0,Z+6 + eor r8,r0 + ldd r0,Z+7 + eor r9,r0 + ldd r0,Z+8 + eor r10,r0 + ldd r0,Z+9 + eor r11,r0 + ldd r0,Z+10 + eor r12,r0 + ldd r0,Z+11 + eor r13,r0 + ldd r0,Z+12 + eor r14,r0 + ldd r0,Z+13 + eor r15,r0 + ldd r0,Z+14 + eor r24,r0 + ldd r0,Z+15 + eor r25,r0 + mov r0,r5 + mov r5,r4 + mov r4,r0 + mov r0,r9 + mov r9,r8 + mov r8,r0 + mov r0,r13 + mov r13,r12 + mov r12,r0 + mov r0,r25 + mov r25,r24 + mov r24,r0 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r14,Y+17 + ldd r15,Y+18 + ldd r24,Y+19 + ldd r25,Y+20 + ldd r6,Y+21 + ldd r7,Y+22 + ldd r8,Y+23 + ldd r9,Y+24 + ldd r2,Y+25 + ldd r3,Y+26 + ldd r4,Y+27 + ldd r5,Y+28 + ldd r10,Y+29 + ldd r11,Y+30 + ldd r12,Y+31 + ldd r13,Y+32 + ldd r0,Z+16 + eor r14,r0 + ldd r0,Z+17 + eor r15,r0 + ldd r0,Z+18 + eor r24,r0 + ldd r0,Z+19 + eor r25,r0 + ldd r0,Z+20 + eor r6,r0 + ldd r0,Z+21 + eor r7,r0 + ldd r0,Z+22 + eor r8,r0 + ldd r0,Z+23 + eor r9,r0 + ldd r0,Z+24 + eor r2,r0 + ldd r0,Z+25 + eor r3,r0 + ldd r0,Z+26 + eor r4,r0 + ldd r0,Z+27 + eor r5,r0 + ldd r0,Z+28 + eor r10,r0 + ldd r0,Z+29 + eor r11,r0 + ldd r0,Z+30 + eor r12,r0 + ldd r0,Z+31 + eor r13,r0 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + mov r0,r25 + mov r25,r24 + mov r24,r0 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + mov r0,r9 + mov r9,r8 + mov r8,r0 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + mov r0,r5 + mov r5,r4 + mov r4,r0 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + mov r0,r13 + mov r13,r12 + mov r12,r0 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+13 + ldd r21,Y+14 + eor r20,r10 + eor r21,r11 + ldd r22,Y+9 + ldd r23,Y+10 + eor r22,r2 + eor r23,r3 + std Y+13,r22 + std Y+14,r23 + ldd r22,Y+5 + ldd r23,Y+6 + eor r22,r6 + eor r23,r7 + std Y+9,r22 + std Y+10,r23 + ldd r22,Y+1 + ldd r23,Y+2 + eor r22,r14 + eor r23,r15 + eor r20,r22 + eor r21,r23 + std Y+5,r22 + std Y+6,r23 + std Y+1,r20 + std Y+2,r21 + ldd r20,Y+15 + ldd r21,Y+16 + eor r20,r12 + eor r21,r13 + ldd r22,Y+11 + ldd r23,Y+12 + eor r22,r4 + eor r23,r5 + std Y+15,r22 + std Y+16,r23 + ldd r22,Y+7 + ldd r23,Y+8 + eor r22,r8 + eor r23,r9 + std Y+11,r22 + std Y+12,r23 + ldd r22,Y+3 + ldd r23,Y+4 + eor r22,r24 + eor r23,r25 + eor r20,r22 + eor r21,r23 + std Y+7,r22 + std Y+8,r23 + std Y+3,r20 + std Y+4,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+9 + ldd r23,Y+10 + std Y+13,r22 + std Y+14,r23 + ldd r22,Y+5 + ldd r23,Y+6 + std Y+9,r22 + std Y+10,r23 + ldd r22,Y+1 + ldd r23,Y+2 + eor r20,r22 + eor r21,r23 + std Y+5,r22 + std Y+6,r23 + std Y+1,r20 + std Y+2,r21 + ldd r20,Y+15 + ldd r21,Y+16 + ldd r22,Y+11 + ldd r23,Y+12 + std Y+15,r22 + std Y+16,r23 + ldd r22,Y+7 + ldd r23,Y+8 + std Y+11,r22 + std Y+12,r23 + ldd r22,Y+3 + ldd r23,Y+4 + eor r20,r22 + eor r21,r23 + std Y+7,r22 + std Y+8,r23 + std Y+3,r20 + std Y+4,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + movw r20,r10 + movw r22,r12 + movw r10,r2 + movw r12,r4 + movw r2,r6 + movw r4,r8 + movw r6,r14 + movw r8,r24 + movw r14,r20 + movw r24,r22 + eor r14,r6 + eor r15,r7 + eor r24,r8 + eor r25,r9 + ldd r0,Y+1 + eor r0,r14 + std Y+1,r0 + ldd r0,Y+2 + eor r0,r15 + std Y+2,r0 + ldd r0,Y+3 + eor r0,r24 + std Y+3,r0 + ldd r0,Y+4 + eor r0,r25 + std Y+4,r0 + ldd r0,Y+5 + eor r0,r6 + std Y+5,r0 + ldd r0,Y+6 + eor r0,r7 + std Y+6,r0 + ldd r0,Y+7 + eor r0,r8 + std Y+7,r0 + ldd r0,Y+8 + eor r0,r9 + std Y+8,r0 + ldd r0,Y+9 + eor r0,r2 + std Y+9,r0 + ldd r0,Y+10 + eor r0,r3 + std Y+10,r0 + ldd r0,Y+11 + eor r0,r4 + std Y+11,r0 + ldd r0,Y+12 + eor r0,r5 + std Y+12,r0 + ldd r0,Y+13 + eor r0,r10 + std Y+13,r0 + ldd r0,Y+14 + eor r0,r11 + std Y+14,r0 + ldd r0,Y+15 + eor r0,r12 + std Y+15,r0 + ldd r0,Y+16 + eor r0,r13 + std Y+16,r0 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r3,Y+2 + ldd r4,Y+3 + ldd r5,Y+4 + ldd r6,Y+5 + ldd r7,Y+6 + ldd r8,Y+7 + ldd r9,Y+8 + ldd r10,Y+9 + ldd r11,Y+10 + ldd r12,Y+11 + ldd r13,Y+12 + ldd r14,Y+13 + ldd r15,Y+14 + ldd r24,Y+15 + ldd r25,Y+16 + mov r0,r5 + mov r5,r4 + mov r4,r0 + mov r0,r9 + mov r9,r8 + mov r8,r0 + mov r0,r13 + mov r13,r12 + mov r12,r0 + mov r0,r25 + mov r25,r24 + mov r24,r0 + mov r0,r2 + or r0,r10 + eor r14,r0 + mov r0,r3 + or r0,r11 + eor r15,r0 + mov r0,r4 + or r0,r12 + eor r24,r0 + mov r0,r5 + or r0,r13 + eor r25,r0 + mov r0,r14 + or r0,r6 + eor r2,r0 + mov r0,r15 + or r0,r7 + eor r3,r0 + mov r0,r24 + or r0,r8 + eor r4,r0 + mov r0,r25 + or r0,r9 + eor r5,r0 + mov r0,r2 + and r0,r10 + eor r6,r0 + mov r0,r3 + and r0,r11 + eor r7,r0 + mov r0,r4 + and r0,r12 + eor r8,r0 + mov r0,r5 + and r0,r13 + eor r9,r0 + mov r0,r2 + or r0,r6 + eor r10,r0 + mov r0,r3 + or r0,r7 + eor r11,r0 + mov r0,r4 + or r0,r8 + eor r12,r0 + mov r0,r5 + or r0,r9 + eor r13,r0 + mov r0,r14 + or r0,r10 + eor r2,r0 + mov r0,r15 + or r0,r11 + eor r3,r0 + mov r0,r24 + or r0,r12 + eor r4,r0 + mov r0,r25 + or r0,r13 + eor r5,r0 + mov r0,r2 + and r0,r6 + eor r14,r0 + mov r0,r3 + and r0,r7 + eor r15,r0 + mov r0,r4 + and r0,r8 + eor r24,r0 + mov r0,r5 + and r0,r9 + eor r25,r0 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Y+17 + ldd r3,Y+18 + ldd r4,Y+19 + ldd r5,Y+20 + ldd r6,Y+21 + ldd r7,Y+22 + ldd r8,Y+23 + ldd r9,Y+24 + ldd r10,Y+25 + ldd r11,Y+26 + ldd r12,Y+27 + ldd r13,Y+28 + ldd r14,Y+29 + ldd r15,Y+30 + ldd r24,Y+31 + ldd r25,Y+32 + mov r0,r15 + mov r15,r14 + mov r14,r0 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + mov r0,r7 + mov r7,r6 + mov r6,r0 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + mov r0,r3 + mov r3,r2 + mov r2,r0 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + mov r0,r11 + mov r11,r10 + mov r10,r0 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + mov r0,r6 + or r0,r14 + eor r2,r0 + mov r0,r7 + or r0,r15 + eor r3,r0 + mov r0,r8 + or r0,r24 + eor r4,r0 + mov r0,r9 + or r0,r25 + eor r5,r0 + mov r0,r2 + or r0,r10 + eor r6,r0 + mov r0,r3 + or r0,r11 + eor r7,r0 + mov r0,r4 + or r0,r12 + eor r8,r0 + mov r0,r5 + or r0,r13 + eor r9,r0 + mov r0,r6 + and r0,r14 + eor r10,r0 + mov r0,r7 + and r0,r15 + eor r11,r0 + mov r0,r8 + and r0,r24 + eor r12,r0 + mov r0,r9 + and r0,r25 + eor r13,r0 + mov r0,r6 + or r0,r10 + eor r14,r0 + mov r0,r7 + or r0,r11 + eor r15,r0 + mov r0,r8 + or r0,r12 + eor r24,r0 + mov r0,r9 + or r0,r13 + eor r25,r0 + mov r0,r2 + or r0,r14 + eor r6,r0 + mov r0,r3 + or r0,r15 + eor r7,r0 + mov r0,r4 + or r0,r24 + eor r8,r0 + mov r0,r5 + or r0,r25 + eor r9,r0 + mov r0,r6 + and r0,r10 + eor r2,r0 + mov r0,r7 + and r0,r11 + eor r3,r0 + mov r0,r8 + and r0,r12 + eor r4,r0 + mov r0,r9 + and r0,r13 + eor r5,r0 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+9 + ldd r21,Y+10 + eor r20,r14 + eor r21,r15 + ldd r22,Y+5 + ldd r23,Y+6 + eor r22,r10 + eor r23,r11 + std Y+9,r22 + std Y+10,r23 + ldd r22,Y+1 + ldd r23,Y+2 + eor r22,r6 + eor r23,r7 + std Y+5,r22 + std Y+6,r23 + ldd r22,Y+13 + ldd r23,Y+14 + eor r22,r2 + eor r23,r3 + eor r20,r22 + eor r21,r23 + std Y+1,r22 + std Y+2,r23 + std Y+13,r20 + std Y+14,r21 + ldd r20,Y+11 + ldd r21,Y+12 + eor r20,r24 + eor r21,r25 + ldd r22,Y+7 + ldd r23,Y+8 + eor r22,r12 + eor r23,r13 + std Y+11,r22 + std Y+12,r23 + ldd r22,Y+3 + ldd r23,Y+4 + eor r22,r8 + eor r23,r9 + std Y+7,r22 + std Y+8,r23 + ldd r22,Y+15 + ldd r23,Y+16 + eor r22,r4 + eor r23,r5 + eor r20,r22 + eor r21,r23 + std Y+3,r22 + std Y+4,r23 + std Y+15,r20 + std Y+16,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+5 + ldd r23,Y+6 + std Y+9,r22 + std Y+10,r23 + ldd r22,Y+1 + ldd r23,Y+2 + std Y+5,r22 + std Y+6,r23 + ldd r22,Y+13 + ldd r23,Y+14 + eor r20,r22 + eor r21,r23 + std Y+1,r22 + std Y+2,r23 + std Y+13,r20 + std Y+14,r21 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r22,Y+7 + ldd r23,Y+8 + std Y+11,r22 + std Y+12,r23 + ldd r22,Y+3 + ldd r23,Y+4 + std Y+7,r22 + std Y+8,r23 + ldd r22,Y+15 + ldd r23,Y+16 + eor r20,r22 + eor r21,r23 + std Y+3,r22 + std Y+4,r23 + std Y+15,r20 + std Y+16,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + movw r20,r14 + movw r22,r24 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r20 + movw r4,r22 + eor r2,r6 + eor r3,r7 + eor r4,r8 + eor r5,r9 + ldd r0,Y+13 + eor r0,r2 + std Y+13,r0 + ldd r0,Y+14 + eor r0,r3 + std Y+14,r0 + ldd r0,Y+15 + eor r0,r4 + std Y+15,r0 + ldd r0,Y+16 + eor r0,r5 + std Y+16,r0 + ldd r0,Y+1 + eor r0,r6 + std Y+1,r0 + ldd r0,Y+2 + eor r0,r7 + std Y+2,r0 + ldd r0,Y+3 + eor r0,r8 + std Y+3,r0 + ldd r0,Y+4 + eor r0,r9 + std Y+4,r0 + ldd r0,Y+5 + eor r0,r10 + std Y+5,r0 + ldd r0,Y+6 + eor r0,r11 + std Y+6,r0 + ldd r0,Y+7 + eor r0,r12 + std Y+7,r0 + ldd r0,Y+8 + eor r0,r13 + std Y+8,r0 + ldd r0,Y+9 + eor r0,r14 + std Y+9,r0 + ldd r0,Y+10 + eor r0,r15 + std Y+10,r0 + ldd r0,Y+11 + eor r0,r24 + std Y+11,r0 + ldd r0,Y+12 + eor r0,r25 + std Y+12,r0 + mov r0,r6 + or r0,r2 + eor r10,r0 + mov r0,r7 + or r0,r3 + eor r11,r0 + mov r0,r8 + or r0,r4 + eor r12,r0 + mov r0,r9 + or r0,r5 + eor r13,r0 + mov r0,r10 + or r0,r14 + eor r6,r0 + mov r0,r11 + or r0,r15 + eor r7,r0 + mov r0,r12 + or r0,r24 + eor r8,r0 + mov r0,r13 + or r0,r25 + eor r9,r0 + mov r0,r6 + and r0,r2 + eor r14,r0 + mov r0,r7 + and r0,r3 + eor r15,r0 + mov r0,r8 + and r0,r4 + eor r24,r0 + mov r0,r9 + and r0,r5 + eor r25,r0 + mov r0,r6 + or r0,r14 + eor r2,r0 + mov r0,r7 + or r0,r15 + eor r3,r0 + mov r0,r8 + or r0,r24 + eor r4,r0 + mov r0,r9 + or r0,r25 + eor r5,r0 + mov r0,r10 + or r0,r2 + eor r6,r0 + mov r0,r11 + or r0,r3 + eor r7,r0 + mov r0,r12 + or r0,r4 + eor r8,r0 + mov r0,r13 + or r0,r5 + eor r9,r0 + mov r0,r6 + and r0,r14 + eor r10,r0 + mov r0,r7 + and r0,r15 + eor r11,r0 + mov r0,r8 + and r0,r24 + eor r12,r0 + mov r0,r9 + and r0,r25 + eor r13,r0 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r3,Y+2 + ldd r4,Y+3 + ldd r5,Y+4 + ldd r6,Y+5 + ldd r7,Y+6 + ldd r8,Y+7 + ldd r9,Y+8 + ldd r10,Y+9 + ldd r11,Y+10 + ldd r12,Y+11 + ldd r13,Y+12 + ldd r14,Y+13 + ldd r15,Y+14 + ldd r24,Y+15 + ldd r25,Y+16 + mov r0,r14 + or r0,r6 + eor r10,r0 + mov r0,r15 + or r0,r7 + eor r11,r0 + mov r0,r24 + or r0,r8 + eor r12,r0 + mov r0,r25 + or r0,r9 + eor r13,r0 + mov r0,r10 + or r0,r2 + eor r14,r0 + mov r0,r11 + or r0,r3 + eor r15,r0 + mov r0,r12 + or r0,r4 + eor r24,r0 + mov r0,r13 + or r0,r5 + eor r25,r0 + mov r0,r14 + and r0,r6 + eor r2,r0 + mov r0,r15 + and r0,r7 + eor r3,r0 + mov r0,r24 + and r0,r8 + eor r4,r0 + mov r0,r25 + and r0,r9 + eor r5,r0 + mov r0,r14 + or r0,r2 + eor r6,r0 + mov r0,r15 + or r0,r3 + eor r7,r0 + mov r0,r24 + or r0,r4 + eor r8,r0 + mov r0,r25 + or r0,r5 + eor r9,r0 + mov r0,r10 + or r0,r6 + eor r14,r0 + mov r0,r11 + or r0,r7 + eor r15,r0 + mov r0,r12 + or r0,r8 + eor r24,r0 + mov r0,r13 + or r0,r9 + eor r25,r0 + mov r0,r14 + and r0,r2 + eor r10,r0 + mov r0,r15 + and r0,r3 + eor r11,r0 + mov r0,r24 + and r0,r4 + eor r12,r0 + mov r0,r25 + and r0,r5 + eor r13,r0 + ret +1233: + rcall 64b + rcall 1239f + subi r16,2 + brne 1233b + rjmp 2560f +1239: + ldd r0,Z+32 + eor r10,r0 + ldd r0,Z+33 + eor r11,r0 + ldd r0,Z+34 + eor r12,r0 + ldd r0,Z+35 + eor r13,r0 + ldd r0,Z+36 + eor r14,r0 + ldd r0,Z+37 + eor r15,r0 + ldd r0,Z+38 + eor r24,r0 + ldd r0,Z+39 + eor r25,r0 + ldd r0,Z+40 + eor r2,r0 + ldd r0,Z+41 + eor r3,r0 + ldd r0,Z+42 + eor r4,r0 + ldd r0,Z+43 + eor r5,r0 + ldd r0,Z+44 + eor r6,r0 + ldd r0,Z+45 + eor r7,r0 + ldd r0,Z+46 + eor r8,r0 + ldd r0,Z+47 + eor r9,r0 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r17,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r17 +#endif + add r31,r19 + dec r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r23,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r23,Z +#elif defined(__AVR_TINY__) + ld r23,Z +#else + lpm + mov r23,r0 +#endif + dec r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r22,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r22,Z +#elif defined(__AVR_TINY__) + ld r22,Z +#else + lpm + mov r22,r0 +#endif + dec r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + dec r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + eor r10,r20 + eor r11,r21 + eor r12,r22 + eor r13,r23 +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + movw r20,r12 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r12,r22 + and r13,r22 + lsl r12 + rol r13 + lsl r12 + rol r13 + or r12,r20 + or r13,r21 + movw r20,r24 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r24,r22 + and r25,r22 + lsl r24 + rol r25 + lsl r24 + rol r25 + or r24,r20 + or r25,r21 + movw r20,r4 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r4,r22 + and r5,r22 + lsl r4 + rol r5 + lsl r4 + rol r5 + or r4,r20 + or r5,r21 + movw r20,r8 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r8,r22 + and r9,r22 + lsl r8 + rol r9 + lsl r8 + rol r9 + or r8,r20 + or r9,r21 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Y+17 + ldd r3,Y+18 + ldd r4,Y+19 + ldd r5,Y+20 + ldd r6,Y+21 + ldd r7,Y+22 + ldd r8,Y+23 + ldd r9,Y+24 + ldd r10,Y+25 + ldd r11,Y+26 + ldd r12,Y+27 + ldd r13,Y+28 + ldd r14,Y+29 + ldd r15,Y+30 + ldd r24,Y+31 + ldd r25,Y+32 + ldd r0,Z+48 + eor r10,r0 + ldd r0,Z+49 + eor r11,r0 + ldd r0,Z+50 + eor r12,r0 + ldd r0,Z+51 + eor r13,r0 + ldd r0,Z+52 + eor r6,r0 + ldd r0,Z+53 + eor r7,r0 + ldd r0,Z+54 + eor r8,r0 + ldd r0,Z+55 + eor r9,r0 + ldd r0,Z+56 + eor r14,r0 + ldd r0,Z+57 + eor r15,r0 + ldd r0,Z+58 + eor r24,r0 + ldd r0,Z+59 + eor r25,r0 + ldd r0,Z+60 + eor r2,r0 + ldd r0,Z+61 + eor r3,r0 + ldd r0,Z+62 + eor r4,r0 + ldd r0,Z+63 + eor r5,r0 + movw r20,r10 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r10,r22 + and r11,r22 + lsl r10 + rol r11 + or r10,r20 + or r11,r21 + movw r20,r12 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r12,r22 + and r13,r22 + lsl r12 + rol r13 + lsl r12 + rol r13 + lsl r12 + rol r13 + or r12,r20 + or r13,r21 + movw r20,r6 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r6,r22 + and r7,r22 + lsl r6 + rol r7 + or r6,r20 + or r7,r21 + movw r20,r8 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r8,r22 + and r9,r22 + lsl r8 + rol r9 + lsl r8 + rol r9 + lsl r8 + rol r9 + or r8,r20 + or r9,r21 + movw r20,r14 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r14,r22 + and r15,r22 + lsl r14 + rol r15 + or r14,r20 + or r15,r21 + movw r20,r24 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r24,r22 + and r25,r22 + lsl r24 + rol r25 + lsl r24 + rol r25 + lsl r24 + rol r25 + or r24,r20 + or r25,r21 + movw r20,r2 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r2,r22 + and r3,r22 + lsl r2 + rol r3 + or r2,r20 + or r3,r21 + movw r20,r4 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r4,r22 + and r5,r22 + lsl r4 + rol r5 + lsl r4 + rol r5 + lsl r4 + rol r5 + or r4,r20 + or r5,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+5 + ldd r21,Y+6 + eor r20,r2 + eor r21,r3 + ldd r22,Y+1 + ldd r23,Y+2 + eor r22,r14 + eor r23,r15 + std Y+5,r22 + std Y+6,r23 + ldd r22,Y+13 + ldd r23,Y+14 + eor r22,r6 + eor r23,r7 + std Y+1,r22 + std Y+2,r23 + ldd r22,Y+9 + ldd r23,Y+10 + eor r22,r10 + eor r23,r11 + eor r20,r22 + eor r21,r23 + std Y+13,r22 + std Y+14,r23 + std Y+9,r20 + std Y+10,r21 + ldd r20,Y+7 + ldd r21,Y+8 + eor r20,r4 + eor r21,r5 + ldd r22,Y+3 + ldd r23,Y+4 + eor r22,r24 + eor r23,r25 + std Y+7,r22 + std Y+8,r23 + ldd r22,Y+15 + ldd r23,Y+16 + eor r22,r8 + eor r23,r9 + std Y+3,r22 + std Y+4,r23 + ldd r22,Y+11 + ldd r23,Y+12 + eor r22,r12 + eor r23,r13 + eor r20,r22 + eor r21,r23 + std Y+15,r22 + std Y+16,r23 + std Y+11,r20 + std Y+12,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+1 + ldd r23,Y+2 + std Y+5,r22 + std Y+6,r23 + ldd r22,Y+13 + ldd r23,Y+14 + std Y+1,r22 + std Y+2,r23 + ldd r22,Y+9 + ldd r23,Y+10 + eor r20,r22 + eor r21,r23 + std Y+13,r22 + std Y+14,r23 + std Y+9,r20 + std Y+10,r21 + ldd r20,Y+7 + ldd r21,Y+8 + ldd r22,Y+3 + ldd r23,Y+4 + std Y+7,r22 + std Y+8,r23 + ldd r22,Y+15 + ldd r23,Y+16 + std Y+3,r22 + std Y+4,r23 + ldd r22,Y+11 + ldd r23,Y+12 + eor r20,r22 + eor r21,r23 + std Y+15,r22 + std Y+16,r23 + std Y+11,r20 + std Y+12,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + movw r20,r2 + movw r22,r4 + movw r2,r14 + movw r4,r24 + movw r14,r6 + movw r24,r8 + movw r6,r10 + movw r8,r12 + movw r10,r20 + movw r12,r22 + eor r10,r6 + eor r11,r7 + eor r12,r8 + eor r13,r9 + ldd r0,Y+9 + eor r0,r10 + std Y+9,r0 + ldd r0,Y+10 + eor r0,r11 + std Y+10,r0 + ldd r0,Y+11 + eor r0,r12 + std Y+11,r0 + ldd r0,Y+12 + eor r0,r13 + std Y+12,r0 + ldd r0,Y+13 + eor r0,r6 + std Y+13,r0 + ldd r0,Y+14 + eor r0,r7 + std Y+14,r0 + ldd r0,Y+15 + eor r0,r8 + std Y+15,r0 + ldd r0,Y+16 + eor r0,r9 + std Y+16,r0 + ldd r0,Y+1 + eor r0,r14 + std Y+1,r0 + ldd r0,Y+2 + eor r0,r15 + std Y+2,r0 + ldd r0,Y+3 + eor r0,r24 + std Y+3,r0 + ldd r0,Y+4 + eor r0,r25 + std Y+4,r0 + ldd r0,Y+5 + eor r0,r2 + std Y+5,r0 + ldd r0,Y+6 + eor r0,r3 + std Y+6,r0 + ldd r0,Y+7 + eor r0,r4 + std Y+7,r0 + ldd r0,Y+8 + eor r0,r5 + std Y+8,r0 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r3,Y+2 + ldd r4,Y+3 + ldd r5,Y+4 + ldd r6,Y+5 + ldd r7,Y+6 + ldd r8,Y+7 + ldd r9,Y+8 + ldd r10,Y+9 + ldd r11,Y+10 + ldd r12,Y+11 + ldd r13,Y+12 + ldd r14,Y+13 + ldd r15,Y+14 + ldd r24,Y+15 + ldd r25,Y+16 + movw r20,r8 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r8,r22 + and r9,r22 + lsl r8 + rol r9 + lsl r8 + rol r9 + or r8,r20 + or r9,r21 + movw r20,r12 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r12,r22 + and r13,r22 + lsl r12 + rol r13 + lsl r12 + rol r13 + or r12,r20 + or r13,r21 + movw r20,r24 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r24,r22 + and r25,r22 + lsl r24 + rol r25 + lsl r24 + rol r25 + or r24,r20 + or r25,r21 + movw r20,r4 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r4,r22 + and r5,r22 + lsl r4 + rol r5 + lsl r4 + rol r5 + or r4,r20 + or r5,r21 + mov r0,r10 + or r0,r2 + eor r6,r0 + mov r0,r11 + or r0,r3 + eor r7,r0 + mov r0,r12 + or r0,r4 + eor r8,r0 + mov r0,r13 + or r0,r5 + eor r9,r0 + mov r0,r6 + or r0,r14 + eor r10,r0 + mov r0,r7 + or r0,r15 + eor r11,r0 + mov r0,r8 + or r0,r24 + eor r12,r0 + mov r0,r9 + or r0,r25 + eor r13,r0 + mov r0,r10 + and r0,r2 + eor r14,r0 + mov r0,r11 + and r0,r3 + eor r15,r0 + mov r0,r12 + and r0,r4 + eor r24,r0 + mov r0,r13 + and r0,r5 + eor r25,r0 + mov r0,r10 + or r0,r14 + eor r2,r0 + mov r0,r11 + or r0,r15 + eor r3,r0 + mov r0,r12 + or r0,r24 + eor r4,r0 + mov r0,r13 + or r0,r25 + eor r5,r0 + mov r0,r6 + or r0,r2 + eor r10,r0 + mov r0,r7 + or r0,r3 + eor r11,r0 + mov r0,r8 + or r0,r4 + eor r12,r0 + mov r0,r9 + or r0,r5 + eor r13,r0 + mov r0,r10 + and r0,r14 + eor r6,r0 + mov r0,r11 + and r0,r15 + eor r7,r0 + mov r0,r12 + and r0,r24 + eor r8,r0 + mov r0,r13 + and r0,r25 + eor r9,r0 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Y+17 + ldd r3,Y+18 + ldd r4,Y+19 + ldd r5,Y+20 + ldd r6,Y+21 + ldd r7,Y+22 + ldd r8,Y+23 + ldd r9,Y+24 + ldd r10,Y+25 + ldd r11,Y+26 + ldd r12,Y+27 + ldd r13,Y+28 + ldd r14,Y+29 + ldd r15,Y+30 + ldd r24,Y+31 + ldd r25,Y+32 + movw r20,r24 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r24,r22 + and r25,r22 + lsl r24 + rol r25 + or r24,r20 + or r25,r21 + movw r20,r14 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r14,r22 + and r15,r22 + lsl r14 + rol r15 + lsl r14 + rol r15 + lsl r14 + rol r15 + or r14,r20 + or r15,r21 + movw r20,r8 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r8,r22 + and r9,r22 + lsl r8 + rol r9 + or r8,r20 + or r9,r21 + movw r20,r6 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r6,r22 + and r7,r22 + lsl r6 + rol r7 + lsl r6 + rol r7 + lsl r6 + rol r7 + or r6,r20 + or r7,r21 + movw r20,r4 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r4,r22 + and r5,r22 + lsl r4 + rol r5 + or r4,r20 + or r5,r21 + movw r20,r2 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r2,r22 + and r3,r22 + lsl r2 + rol r3 + lsl r2 + rol r3 + lsl r2 + rol r3 + or r2,r20 + or r3,r21 + movw r20,r12 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r12,r22 + and r13,r22 + lsl r12 + rol r13 + or r12,r20 + or r13,r21 + movw r20,r10 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r10,r22 + and r11,r22 + lsl r10 + rol r11 + lsl r10 + rol r11 + lsl r10 + rol r11 + or r10,r20 + or r11,r21 + mov r0,r6 + or r0,r10 + eor r14,r0 + mov r0,r7 + or r0,r11 + eor r15,r0 + mov r0,r8 + or r0,r12 + eor r24,r0 + mov r0,r9 + or r0,r13 + eor r25,r0 + mov r0,r14 + or r0,r2 + eor r6,r0 + mov r0,r15 + or r0,r3 + eor r7,r0 + mov r0,r24 + or r0,r4 + eor r8,r0 + mov r0,r25 + or r0,r5 + eor r9,r0 + mov r0,r6 + and r0,r10 + eor r2,r0 + mov r0,r7 + and r0,r11 + eor r3,r0 + mov r0,r8 + and r0,r12 + eor r4,r0 + mov r0,r9 + and r0,r13 + eor r5,r0 + mov r0,r6 + or r0,r2 + eor r10,r0 + mov r0,r7 + or r0,r3 + eor r11,r0 + mov r0,r8 + or r0,r4 + eor r12,r0 + mov r0,r9 + or r0,r5 + eor r13,r0 + mov r0,r14 + or r0,r10 + eor r6,r0 + mov r0,r15 + or r0,r11 + eor r7,r0 + mov r0,r24 + or r0,r12 + eor r8,r0 + mov r0,r25 + or r0,r13 + eor r9,r0 + mov r0,r6 + and r0,r2 + eor r14,r0 + mov r0,r7 + and r0,r3 + eor r15,r0 + mov r0,r8 + and r0,r4 + eor r24,r0 + mov r0,r9 + and r0,r5 + eor r25,r0 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+1 + ldd r21,Y+2 + eor r20,r10 + eor r21,r11 + ldd r22,Y+13 + ldd r23,Y+14 + eor r22,r2 + eor r23,r3 + std Y+1,r22 + std Y+2,r23 + ldd r22,Y+9 + ldd r23,Y+10 + eor r22,r6 + eor r23,r7 + std Y+13,r22 + std Y+14,r23 + ldd r22,Y+5 + ldd r23,Y+6 + eor r22,r14 + eor r23,r15 + eor r20,r22 + eor r21,r23 + std Y+9,r22 + std Y+10,r23 + std Y+5,r20 + std Y+6,r21 + ldd r20,Y+3 + ldd r21,Y+4 + eor r20,r12 + eor r21,r13 + ldd r22,Y+15 + ldd r23,Y+16 + eor r22,r4 + eor r23,r5 + std Y+3,r22 + std Y+4,r23 + ldd r22,Y+11 + ldd r23,Y+12 + eor r22,r8 + eor r23,r9 + std Y+15,r22 + std Y+16,r23 + ldd r22,Y+7 + ldd r23,Y+8 + eor r22,r24 + eor r23,r25 + eor r20,r22 + eor r21,r23 + std Y+11,r22 + std Y+12,r23 + std Y+7,r20 + std Y+8,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+13 + ldd r23,Y+14 + std Y+1,r22 + std Y+2,r23 + ldd r22,Y+9 + ldd r23,Y+10 + std Y+13,r22 + std Y+14,r23 + ldd r22,Y+5 + ldd r23,Y+6 + eor r20,r22 + eor r21,r23 + std Y+9,r22 + std Y+10,r23 + std Y+5,r20 + std Y+6,r21 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r22,Y+15 + ldd r23,Y+16 + std Y+3,r22 + std Y+4,r23 + ldd r22,Y+11 + ldd r23,Y+12 + std Y+15,r22 + std Y+16,r23 + ldd r22,Y+7 + ldd r23,Y+8 + eor r20,r22 + eor r21,r23 + std Y+11,r22 + std Y+12,r23 + std Y+7,r20 + std Y+8,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + movw r20,r10 + movw r22,r12 + movw r10,r2 + movw r12,r4 + movw r2,r6 + movw r4,r8 + movw r6,r14 + movw r8,r24 + movw r14,r20 + movw r24,r22 + eor r14,r6 + eor r15,r7 + eor r24,r8 + eor r25,r9 + ldd r0,Y+5 + eor r0,r14 + std Y+5,r0 + ldd r0,Y+6 + eor r0,r15 + std Y+6,r0 + ldd r0,Y+7 + eor r0,r24 + std Y+7,r0 + ldd r0,Y+8 + eor r0,r25 + std Y+8,r0 + ldd r0,Y+9 + eor r0,r6 + std Y+9,r0 + ldd r0,Y+10 + eor r0,r7 + std Y+10,r0 + ldd r0,Y+11 + eor r0,r8 + std Y+11,r0 + ldd r0,Y+12 + eor r0,r9 + std Y+12,r0 + ldd r0,Y+13 + eor r0,r2 + std Y+13,r0 + ldd r0,Y+14 + eor r0,r3 + std Y+14,r0 + ldd r0,Y+15 + eor r0,r4 + std Y+15,r0 + ldd r0,Y+16 + eor r0,r5 + std Y+16,r0 + ldd r0,Y+1 + eor r0,r10 + std Y+1,r0 + ldd r0,Y+2 + eor r0,r11 + std Y+2,r0 + ldd r0,Y+3 + eor r0,r12 + std Y+3,r0 + ldd r0,Y+4 + eor r0,r13 + std Y+4,r0 + mov r0,r6 + or r0,r14 + eor r2,r0 + mov r0,r7 + or r0,r15 + eor r3,r0 + mov r0,r8 + or r0,r24 + eor r4,r0 + mov r0,r9 + or r0,r25 + eor r5,r0 + mov r0,r2 + or r0,r10 + eor r6,r0 + mov r0,r3 + or r0,r11 + eor r7,r0 + mov r0,r4 + or r0,r12 + eor r8,r0 + mov r0,r5 + or r0,r13 + eor r9,r0 + mov r0,r6 + and r0,r14 + eor r10,r0 + mov r0,r7 + and r0,r15 + eor r11,r0 + mov r0,r8 + and r0,r24 + eor r12,r0 + mov r0,r9 + and r0,r25 + eor r13,r0 + mov r0,r6 + or r0,r10 + eor r14,r0 + mov r0,r7 + or r0,r11 + eor r15,r0 + mov r0,r8 + or r0,r12 + eor r24,r0 + mov r0,r9 + or r0,r13 + eor r25,r0 + mov r0,r2 + or r0,r14 + eor r6,r0 + mov r0,r3 + or r0,r15 + eor r7,r0 + mov r0,r4 + or r0,r24 + eor r8,r0 + mov r0,r5 + or r0,r25 + eor r9,r0 + mov r0,r6 + and r0,r10 + eor r2,r0 + mov r0,r7 + and r0,r11 + eor r3,r0 + mov r0,r8 + and r0,r12 + eor r4,r0 + mov r0,r9 + and r0,r13 + eor r5,r0 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r3,Y+2 + ldd r4,Y+3 + ldd r5,Y+4 + ldd r6,Y+5 + ldd r7,Y+6 + ldd r8,Y+7 + ldd r9,Y+8 + ldd r10,Y+9 + ldd r11,Y+10 + ldd r12,Y+11 + ldd r13,Y+12 + ldd r14,Y+13 + ldd r15,Y+14 + ldd r24,Y+15 + ldd r25,Y+16 + mov r0,r6 + or r0,r14 + eor r2,r0 + mov r0,r7 + or r0,r15 + eor r3,r0 + mov r0,r8 + or r0,r24 + eor r4,r0 + mov r0,r9 + or r0,r25 + eor r5,r0 + mov r0,r2 + or r0,r10 + eor r6,r0 + mov r0,r3 + or r0,r11 + eor r7,r0 + mov r0,r4 + or r0,r12 + eor r8,r0 + mov r0,r5 + or r0,r13 + eor r9,r0 + mov r0,r6 + and r0,r14 + eor r10,r0 + mov r0,r7 + and r0,r15 + eor r11,r0 + mov r0,r8 + and r0,r24 + eor r12,r0 + mov r0,r9 + and r0,r25 + eor r13,r0 + mov r0,r6 + or r0,r10 + eor r14,r0 + mov r0,r7 + or r0,r11 + eor r15,r0 + mov r0,r8 + or r0,r12 + eor r24,r0 + mov r0,r9 + or r0,r13 + eor r25,r0 + mov r0,r2 + or r0,r14 + eor r6,r0 + mov r0,r3 + or r0,r15 + eor r7,r0 + mov r0,r4 + or r0,r24 + eor r8,r0 + mov r0,r5 + or r0,r25 + eor r9,r0 + mov r0,r6 + and r0,r10 + eor r2,r0 + mov r0,r7 + and r0,r11 + eor r3,r0 + mov r0,r8 + and r0,r12 + eor r4,r0 + mov r0,r9 + and r0,r13 + eor r5,r0 + ret +2560: + ldd r26,Y+33 + ldd r27,Y+34 + ld r0,Z + eor r2,r0 + ldd r0,Z+1 + eor r3,r0 + ldd r0,Z+2 + eor r4,r0 + ldd r0,Z+3 + eor r5,r0 + ldd r0,Z+4 + eor r6,r0 + ldd r0,Z+5 + eor r7,r0 + ldd r0,Z+6 + eor r8,r0 + ldd r0,Z+7 + eor r9,r0 + ldd r0,Z+8 + eor r10,r0 + ldd r0,Z+9 + eor r11,r0 + ldd r0,Z+10 + eor r12,r0 + ldd r0,Z+11 + eor r13,r0 + ldd r0,Z+12 + eor r14,r0 + ldd r0,Z+13 + eor r15,r0 + ldd r0,Z+14 + eor r24,r0 + ldd r0,Z+15 + eor r25,r0 + st X+,r2 + st X+,r3 + st X+,r6 + st X+,r7 + st X+,r10 + st X+,r11 + st X+,r14 + st X+,r15 + ldd r20,Y+17 + ldd r21,Y+18 + ldd r0,Z+16 + eor r20,r0 + ldd r0,Z+17 + eor r21,r0 + st X+,r20 + st X+,r21 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r0,Z+20 + eor r20,r0 + ldd r0,Z+21 + eor r21,r0 + st X+,r20 + st X+,r21 + ldd r20,Y+25 + ldd r21,Y+26 + ldd r0,Z+24 + eor r20,r0 + ldd r0,Z+25 + eor r21,r0 + st X+,r20 + st X+,r21 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r0,Z+28 + eor r20,r0 + ldd r0,Z+29 + eor r21,r0 + st X+,r20 + st X+,r21 + st X+,r4 + st X+,r5 + st X+,r8 + st X+,r9 + st X+,r12 + st X+,r13 + st X+,r24 + st X+,r25 + ldd r20,Y+19 + ldd r21,Y+20 + ldd r0,Z+18 + eor r20,r0 + ldd r0,Z+19 + eor r21,r0 + st X+,r20 + st X+,r21 + ldd r20,Y+23 + ldd r21,Y+24 + ldd r0,Z+22 + eor r20,r0 + ldd r0,Z+23 + eor r21,r0 + st X+,r20 + st X+,r21 + ldd r20,Y+27 + ldd r21,Y+28 + ldd r0,Z+26 + eor r20,r0 + ldd r0,Z+27 + eor r21,r0 + st X+,r20 + st X+,r21 + ldd r20,Y+31 + ldd r21,Y+32 + ldd r0,Z+30 + eor r20,r0 + ldd r0,Z+31 + eor r21,r0 + st X+,r20 + st X+,r21 + adiw r28,34 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size saturnin_decrypt_block, .-saturnin_decrypt_block + +#endif diff --git a/saturnin/Implementations/crypto_hash/saturninhashv2/rhys/internal-saturnin.c b/saturnin/Implementations/crypto_hash/saturninhashv2/rhys/internal-saturnin.c new file mode 100644 index 0000000..f4be50d --- /dev/null +++ b/saturnin/Implementations/crypto_hash/saturninhashv2/rhys/internal-saturnin.c @@ -0,0 +1,483 @@ +/* + * Copyright (C) 2020 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "internal-saturnin.h" + +#if !defined(__AVR__) + +/* Round constants for various combinations of rounds and domain_sep */ +static uint32_t const saturnin_rc[] = { + /* RC_10_1 */ + 0x4eb026c2, 0x90595303, 0xaa8fe632, 0xfe928a92, 0x4115a419, + 0x93539532, 0x5db1cc4e, 0x541515ca, 0xbd1f55a8, 0x5a6e1a0d, + /* RC_10_2 */ + 0x4e4526b5, 0xa3565ff0, 0x0f8f20d8, 0x0b54bee1, 0x7d1a6c9d, + 0x17a6280a, 0xaa46c986, 0xc1199062, 0x182c5cde, 0xa00d53fe, + /* RC_10_3 */ + 0x4e162698, 0xb2535ba1, 0x6c8f9d65, 0x5816ad30, 0x691fd4fa, + 0x6bf5bcf9, 0xf8eb3525, 0xb21decfa, 0x7b3da417, 0xf62c94b4, + /* RC_10_4 */ + 0x4faf265b, 0xc5484616, 0x45dcad21, 0xe08bd607, 0x0504fdb8, + 0x1e1f5257, 0x45fbc216, 0xeb529b1f, 0x52194e32, 0x5498c018, + /* RC_10_5 */ + 0x4ffc2676, 0xd44d4247, 0x26dc109c, 0xb3c9c5d6, 0x110145df, + 0x624cc6a4, 0x17563eb5, 0x9856e787, 0x3108b6fb, 0x02b90752, + /* RC_10_6 */ + 0x4f092601, 0xe7424eb4, 0x83dcd676, 0x460ff1a5, 0x2d0e8d5b, + 0xe6b97b9c, 0xe0a13b7d, 0x0d5a622f, 0x943bbf8d, 0xf8da4ea1, + /* RC_16_7 */ + 0x3fba180c, 0x563ab9ab, 0x125ea5ef, 0x859da26c, 0xb8cf779b, + 0x7d4de793, 0x07efb49f, 0x8d525306, 0x1e08e6ab, 0x41729f87, + 0x8c4aef0a, 0x4aa0c9a7, 0xd93a95ef, 0xbb00d2af, 0xb62c5bf0, + 0x386d94d8, + /* RC_16_8 */ + 0x3c9b19a7, 0xa9098694, 0x23f878da, 0xa7b647d3, 0x74fc9d78, + 0xeacaae11, 0x2f31a677, 0x4cc8c054, 0x2f51ca05, 0x5268f195, + 0x4f5b8a2b, 0xf614b4ac, 0xf1d95401, 0x764d2568, 0x6a493611, + 0x8eef9c3e +}; + +/* Loads a 32-bit word from the two halves of a 256-bit Saturnin input block */ +#define saturnin_load_word32(ptr) \ + ((((uint32_t)((ptr)[17])) << 24) | \ + (((uint32_t)((ptr)[16])) << 16) | \ + (((uint32_t)((ptr)[1])) << 8) | \ + ((uint32_t)((ptr)[0]))) + +/* Stores a 32-bit word to the two halves of a 256-bit Saturnin output block */ +#define saturnin_store_word32(ptr, x) \ + do { \ + (ptr)[0] = (uint8_t)(x); \ + (ptr)[1] = (uint8_t)((x) >> 8); \ + (ptr)[16] = (uint8_t)((x) >> 16); \ + (ptr)[17] = (uint8_t)((x) >> 24); \ + } while (0) + +/* Rotate the 4-bit nibbles within a 16-bit word left */ +#define leftRotate4_N(a, mask1, bits1, mask2, bits2) \ + do { \ + (a) = (((a) & (mask1)) << (bits1)) | \ + (((a) & ((mask1) ^ (uint32_t)0xFFFFU)) >> (4 - (bits1))) | \ + (((a) & (((uint32_t)(mask2)) << 16)) << (bits2)) | \ + (((a) & (((uint32_t)((mask2)) << 16) ^ 0xFFFF0000U)) >> (4 - (bits2))); \ + } while (0) + +/* Rotate 16-bit subwords left */ +#define leftRotate16_N(a, mask1, bits1, mask2, bits2) \ + do { \ + (a) = (((a) & (mask1)) << (bits1)) | \ + (((a) & ((mask1) ^ (uint32_t)0xFFFFU)) >> (16 - (bits1))) | \ + (((a) & (((uint32_t)(mask2)) << 16)) << (bits2)) | \ + (((a) & (((uint32_t)((mask2)) << 16) ^ 0xFFFF0000U)) >> (16 - (bits2))); \ + } while (0) + +/** + * \brief XOR the key into the Saturnin state. + * + * \param x0 First word of the bit-sliced state. + * \param x1 Second word of the bit-sliced state. + * \param x2 Third word of the bit-sliced state. + * \param x3 Fourth word of the bit-sliced state. + * \param x4 Fifth word of the bit-sliced state. + * \param x5 Sixth word of the bit-sliced state. + * \param x6 Seventh word of the bit-sliced state. + * \param x7 Eighth word of the bit-sliced state. + */ +#define saturnin_xor_key(x0, x1, x2, x3, x4, x5, x6, x7) \ + do { \ + x0 ^= ks->k[0]; \ + x1 ^= ks->k[1]; \ + x2 ^= ks->k[2]; \ + x3 ^= ks->k[3]; \ + x4 ^= ks->k[4]; \ + x5 ^= ks->k[5]; \ + x6 ^= ks->k[6]; \ + x7 ^= ks->k[7]; \ + } while (0) + +/** + * \brief XOR a rotated version of the key into the Saturnin state. + * + * \param x0 First word of the bit-sliced state. + * \param x1 Second word of the bit-sliced state. + * \param x2 Third word of the bit-sliced state. + * \param x3 Fourth word of the bit-sliced state. + * \param x4 Fifth word of the bit-sliced state. + * \param x5 Sixth word of the bit-sliced state. + * \param x6 Seventh word of the bit-sliced state. + * \param x7 Eighth word of the bit-sliced state. + */ +#define saturnin_xor_key_rotated(x0, x1, x2, x3, x4, x5, x6, x7) \ + do { \ + x0 ^= ks->k[8]; \ + x1 ^= ks->k[9]; \ + x2 ^= ks->k[10]; \ + x3 ^= ks->k[11]; \ + x4 ^= ks->k[12]; \ + x5 ^= ks->k[13]; \ + x6 ^= ks->k[14]; \ + x7 ^= ks->k[15]; \ + } while (0) + +/** + * \brief Applies the Saturnin S-box to a bit-sliced set of nibbles. + * + * \param a First bit-slice. + * \param b Second bit-slice. + * \param c Third bit-slice. + * \param d Fourth bit-slice. + * + * The S-box also involves a rotation on the output words. We perform the + * rotation implicitly in the higher layers. + */ +#define saturnin_sbox(a, b, c, d) \ + do { \ + (a) ^= (b) & (c); \ + (b) ^= (a) | (d); \ + (d) ^= (b) | (c); \ + (c) ^= (b) & (d); \ + (b) ^= (a) | (c); \ + (a) ^= (b) | (d); \ + } while (0) + +/** + * \brief Applies the inverse of the Saturnin S-box to a set of nibbles. + * + * \param a First bit-slice. + * \param b Second bit-slice. + * \param c Third bit-slice. + * \param d Fourth bit-slice. + * + * The inverse of the S-box also involves a rotation on the input words. + * We perform the rotation implicitly in the higher layers. + */ +#define saturnin_sbox_inverse(a, b, c, d) \ + do { \ + (a) ^= (b) | (d); \ + (b) ^= (a) | (c); \ + (c) ^= (b) & (d); \ + (d) ^= (b) | (c); \ + (b) ^= (a) | (d); \ + (a) ^= (b) & (c); \ + } while (0) + +/* Helpers for MDS matrix operations */ +#define SWAP(a) (((a) << 16) | ((a) >> 16)) +#define MUL(x0, x1, x2, x3) \ + do { \ + temp = x0; x0 = x1; x1 = x2; x2 = x3; x3 = temp ^ x0; \ + } while (0) +#define MULINV(x0, x1, x2, x3) \ + do { \ + temp = x3; x3 = x2; x2 = x1; x1 = x0; x0 = x1 ^ temp; \ + } while (0) + +/** + * \brief Applies the MDS matrix to the Saturnin state. + * + * \param x0 First word of the bit-sliced state. + * \param x1 Second word of the bit-sliced state. + * \param x2 Third word of the bit-sliced state. + * \param x3 Fourth word of the bit-sliced state. + * \param x4 Fifth word of the bit-sliced state. + * \param x5 Sixth word of the bit-sliced state. + * \param x6 Seventh word of the bit-sliced state. + * \param x7 Eighth word of the bit-sliced state. + */ +#define saturnin_mds(x0, x1, x2, x3, x4, x5, x6, x7) \ + do { \ + x0 ^= x4; x1 ^= x5; x2 ^= x6; x3 ^= x7; \ + MUL(x4, x5, x6, x7); \ + x4 ^= SWAP(x0); x5 ^= SWAP(x1); \ + x6 ^= SWAP(x2); x7 ^= SWAP(x3); \ + MUL(x0, x1, x2, x3); \ + MUL(x0, x1, x2, x3); \ + x0 ^= x4; x1 ^= x5; x2 ^= x6; x3 ^= x7; \ + x4 ^= SWAP(x0); x5 ^= SWAP(x1); \ + x6 ^= SWAP(x2); x7 ^= SWAP(x3); \ + } while (0) + +/** + * \brief Applies the inverse of the MDS matrix to the Saturnin state. + * + * \param x0 First word of the bit-sliced state. + * \param x1 Second word of the bit-sliced state. + * \param x2 Third word of the bit-sliced state. + * \param x3 Fourth word of the bit-sliced state. + * \param x4 Fifth word of the bit-sliced state. + * \param x5 Sixth word of the bit-sliced state. + * \param x6 Seventh word of the bit-sliced state. + * \param x7 Eighth word of the bit-sliced state. + */ +#define saturnin_mds_inverse(x0, x1, x2, x3, x4, x5, x6, x7) \ + do { \ + x6 ^= SWAP(x2); x7 ^= SWAP(x3); \ + x4 ^= SWAP(x0); x5 ^= SWAP(x1); \ + x0 ^= x4; x1 ^= x5; x2 ^= x6; x3 ^= x7; \ + MULINV(x0, x1, x2, x3); \ + MULINV(x0, x1, x2, x3); \ + x6 ^= SWAP(x2); x7 ^= SWAP(x3); \ + x4 ^= SWAP(x0); x5 ^= SWAP(x1); \ + MULINV(x4, x5, x6, x7); \ + x0 ^= x4; x1 ^= x5; x2 ^= x6; x3 ^= x7; \ + } while (0) + +/** + * \brief Applies the slice permutation to the Saturnin state. + * + * \param x0 First word of the bit-sliced state. + * \param x1 Second word of the bit-sliced state. + * \param x2 Third word of the bit-sliced state. + * \param x3 Fourth word of the bit-sliced state. + * \param x4 Fifth word of the bit-sliced state. + * \param x5 Sixth word of the bit-sliced state. + * \param x6 Seventh word of the bit-sliced state. + * \param x7 Eighth word of the bit-sliced state. + */ +#define saturnin_slice(x0, x1, x2, x3, x4, x5, x6, x7) \ + do { \ + leftRotate4_N(x0, 0xFFFFU, 0, 0x3333, 2); \ + leftRotate4_N(x1, 0xFFFFU, 0, 0x3333, 2); \ + leftRotate4_N(x2, 0xFFFFU, 0, 0x3333, 2); \ + leftRotate4_N(x3, 0xFFFFU, 0, 0x3333, 2); \ + leftRotate4_N(x4, 0x7777U, 1, 0x1111, 3); \ + leftRotate4_N(x5, 0x7777U, 1, 0x1111, 3); \ + leftRotate4_N(x6, 0x7777U, 1, 0x1111, 3); \ + leftRotate4_N(x7, 0x7777U, 1, 0x1111, 3); \ + } while (0) + +/** + * \brief Applies the inverse of the slice permutation to the Saturnin state. + * + * \param x0 First word of the bit-sliced state. + * \param x1 Second word of the bit-sliced state. + * \param x2 Third word of the bit-sliced state. + * \param x3 Fourth word of the bit-sliced state. + * \param x4 Fifth word of the bit-sliced state. + * \param x5 Sixth word of the bit-sliced state. + * \param x6 Seventh word of the bit-sliced state. + * \param x7 Eighth word of the bit-sliced state. + */ +#define saturnin_slice_inverse(x0, x1, x2, x3, x4, x5, x6, x7) \ + do { \ + leftRotate4_N(x0, 0xFFFFU, 0, 0x3333, 2); \ + leftRotate4_N(x1, 0xFFFFU, 0, 0x3333, 2); \ + leftRotate4_N(x2, 0xFFFFU, 0, 0x3333, 2); \ + leftRotate4_N(x3, 0xFFFFU, 0, 0x3333, 2); \ + leftRotate4_N(x4, 0x1111U, 3, 0x7777, 1); \ + leftRotate4_N(x5, 0x1111U, 3, 0x7777, 1); \ + leftRotate4_N(x6, 0x1111U, 3, 0x7777, 1); \ + leftRotate4_N(x7, 0x1111U, 3, 0x7777, 1); \ + } while (0) + +/** + * \brief Applies the sheet permutation to the Saturnin state. + * + * \param x0 First word of the bit-sliced state. + * \param x1 Second word of the bit-sliced state. + * \param x2 Third word of the bit-sliced state. + * \param x3 Fourth word of the bit-sliced state. + * \param x4 Fifth word of the bit-sliced state. + * \param x5 Sixth word of the bit-sliced state. + * \param x6 Seventh word of the bit-sliced state. + * \param x7 Eighth word of the bit-sliced state. + */ +#define saturnin_sheet(x0, x1, x2, x3, x4, x5, x6, x7) \ + do { \ + leftRotate16_N(x0, 0xFFFFU, 0, 0x00FF, 8); \ + leftRotate16_N(x1, 0xFFFFU, 0, 0x00FF, 8); \ + leftRotate16_N(x2, 0xFFFFU, 0, 0x00FF, 8); \ + leftRotate16_N(x3, 0xFFFFU, 0, 0x00FF, 8); \ + leftRotate16_N(x4, 0x0FFFU, 4, 0x000F, 12); \ + leftRotate16_N(x5, 0x0FFFU, 4, 0x000F, 12); \ + leftRotate16_N(x6, 0x0FFFU, 4, 0x000F, 12); \ + leftRotate16_N(x7, 0x0FFFU, 4, 0x000F, 12); \ + } while (0) + +/** + * \brief Applies the inverse of the sheet permutation to the Saturnin state. + * + * \param x0 First word of the bit-sliced state. + * \param x1 Second word of the bit-sliced state. + * \param x2 Third word of the bit-sliced state. + * \param x3 Fourth word of the bit-sliced state. + * \param x4 Fifth word of the bit-sliced state. + * \param x5 Sixth word of the bit-sliced state. + * \param x6 Seventh word of the bit-sliced state. + * \param x7 Eighth word of the bit-sliced state. + */ +#define saturnin_sheet_inverse(x0, x1, x2, x3, x4, x5, x6, x7) \ + do { \ + leftRotate16_N(x0, 0xFFFFU, 0, 0x00FF, 8); \ + leftRotate16_N(x1, 0xFFFFU, 0, 0x00FF, 8); \ + leftRotate16_N(x2, 0xFFFFU, 0, 0x00FF, 8); \ + leftRotate16_N(x3, 0xFFFFU, 0, 0x00FF, 8); \ + leftRotate16_N(x4, 0x000FU, 12, 0x0FFF, 4); \ + leftRotate16_N(x5, 0x000FU, 12, 0x0FFF, 4); \ + leftRotate16_N(x6, 0x000FU, 12, 0x0FFF, 4); \ + leftRotate16_N(x7, 0x000FU, 12, 0x0FFF, 4); \ + } while (0) + +void saturnin_setup_key + (saturnin_key_schedule_t *ks, const unsigned char *key) +{ + int index; + uint32_t temp; + for (index = 0; index < 16; index += 2) { + temp = saturnin_load_word32(key + index); + ks->k[index / 2] = temp; + ks->k[8 + (index / 2)] = ((temp & 0x001F001FU) << 11) | + ((temp >> 5) & 0x07FF07FFU); + } +} + +void saturnin_encrypt_block + (const saturnin_key_schedule_t *ks, unsigned char *output, + const unsigned char *input, unsigned domain) +{ + unsigned rounds = (domain >= SATURNIN_DOMAIN_16_7) ? 8 : 5; + const uint32_t *rc = saturnin_rc + domain; + uint32_t x0, x1, x2, x3, x4, x5, x6, x7, temp; + + /* Load the input into local variables */ + x0 = saturnin_load_word32(input); + x1 = saturnin_load_word32(input + 2); + x2 = saturnin_load_word32(input + 4); + x3 = saturnin_load_word32(input + 6); + x4 = saturnin_load_word32(input + 8); + x5 = saturnin_load_word32(input + 10); + x6 = saturnin_load_word32(input + 12); + x7 = saturnin_load_word32(input + 14); + + /* XOR the key into the state */ + saturnin_xor_key(x0, x1, x2, x3, x4, x5, x6, x7); + + /* Perform all encryption rounds, two at a time */ + for (; rounds > 0; --rounds, rc += 2) { + /* Even rounds */ + saturnin_sbox(x0, x1, x2, x3); + saturnin_sbox(x4, x5, x6, x7); + saturnin_mds(x1, x2, x3, x0, x7, x5, x4, x6); + saturnin_sbox(x1, x2, x3, x0); + saturnin_sbox(x7, x5, x4, x6); + saturnin_slice(x2, x3, x0, x1, x6, x5, x7, x4); + saturnin_mds(x2, x3, x0, x1, x6, x5, x7, x4); + saturnin_slice_inverse(x2, x3, x0, x1, x6, x5, x7, x4); + x2 ^= rc[0]; + saturnin_xor_key_rotated(x2, x3, x0, x1, x6, x5, x7, x4); + + /* Odd rounds */ + saturnin_sbox(x2, x3, x0, x1); + saturnin_sbox(x6, x5, x7, x4); + saturnin_mds(x3, x0, x1, x2, x4, x5, x6, x7); + saturnin_sbox(x3, x0, x1, x2); + saturnin_sbox(x4, x5, x6, x7); + saturnin_sheet(x0, x1, x2, x3, x7, x5, x4, x6); + saturnin_mds(x0, x1, x2, x3, x7, x5, x4, x6); + saturnin_sheet_inverse(x0, x1, x2, x3, x7, x5, x4, x6); + x0 ^= rc[1]; + saturnin_xor_key(x0, x1, x2, x3, x7, x5, x4, x6); + + /* Correct the rotation of the second half before the next round */ + temp = x4; + x4 = x7; + x7 = x6; + x6 = temp; + } + + /* Store the local variables to the output buffer */ + saturnin_store_word32(output, x0); + saturnin_store_word32(output + 2, x1); + saturnin_store_word32(output + 4, x2); + saturnin_store_word32(output + 6, x3); + saturnin_store_word32(output + 8, x4); + saturnin_store_word32(output + 10, x5); + saturnin_store_word32(output + 12, x6); + saturnin_store_word32(output + 14, x7); +} + +void saturnin_decrypt_block + (const saturnin_key_schedule_t *ks, unsigned char *output, + const unsigned char *input, unsigned domain) +{ + unsigned rounds = (domain >= SATURNIN_DOMAIN_16_7) ? 8 : 5; + const uint32_t *rc = saturnin_rc + domain + (rounds - 1) * 2; + uint32_t x0, x1, x2, x3, x4, x5, x6, x7, temp; + + /* Load the input into local variables */ + x0 = saturnin_load_word32(input); + x1 = saturnin_load_word32(input + 2); + x2 = saturnin_load_word32(input + 4); + x3 = saturnin_load_word32(input + 6); + x4 = saturnin_load_word32(input + 8); + x5 = saturnin_load_word32(input + 10); + x6 = saturnin_load_word32(input + 12); + x7 = saturnin_load_word32(input + 14); + + /* Perform all decryption rounds, two at a time */ + for (; rounds > 0; --rounds, rc -= 2) { + /* Correct the rotation of the second half before the next round */ + temp = x6; + x6 = x7; + x7 = x4; + x4 = temp; + + /* Odd rounds */ + saturnin_xor_key(x0, x1, x2, x3, x7, x5, x4, x6); + x0 ^= rc[1]; + saturnin_sheet(x0, x1, x2, x3, x7, x5, x4, x6); + saturnin_mds_inverse(x0, x1, x2, x3, x7, x5, x4, x6); + saturnin_sheet_inverse(x0, x1, x2, x3, x7, x5, x4, x6); + saturnin_sbox_inverse(x3, x0, x1, x2); + saturnin_sbox_inverse(x4, x5, x6, x7); + saturnin_mds_inverse(x3, x0, x1, x2, x4, x5, x6, x7); + saturnin_sbox_inverse(x2, x3, x0, x1); + saturnin_sbox_inverse(x6, x5, x7, x4); + + /* Even rounds */ + saturnin_xor_key_rotated(x2, x3, x0, x1, x6, x5, x7, x4); + x2 ^= rc[0]; + saturnin_slice(x2, x3, x0, x1, x6, x5, x7, x4); + saturnin_mds_inverse(x2, x3, x0, x1, x6, x5, x7, x4); + saturnin_slice_inverse(x2, x3, x0, x1, x6, x5, x7, x4); + saturnin_sbox_inverse(x1, x2, x3, x0); + saturnin_sbox_inverse(x7, x5, x4, x6); + saturnin_mds_inverse(x1, x2, x3, x0, x7, x5, x4, x6); + saturnin_sbox_inverse(x0, x1, x2, x3); + saturnin_sbox_inverse(x4, x5, x6, x7); + } + + /* XOR the key into the state */ + saturnin_xor_key(x0, x1, x2, x3, x4, x5, x6, x7); + + /* Store the local variables to the output buffer */ + saturnin_store_word32(output, x0); + saturnin_store_word32(output + 2, x1); + saturnin_store_word32(output + 4, x2); + saturnin_store_word32(output + 6, x3); + saturnin_store_word32(output + 8, x4); + saturnin_store_word32(output + 10, x5); + saturnin_store_word32(output + 12, x6); + saturnin_store_word32(output + 14, x7); +} + +#endif /* !__AVR__ */ diff --git a/saturnin/Implementations/crypto_hash/saturninhashv2/rhys/internal-saturnin.h b/saturnin/Implementations/crypto_hash/saturninhashv2/rhys/internal-saturnin.h new file mode 100644 index 0000000..8af07c3 --- /dev/null +++ b/saturnin/Implementations/crypto_hash/saturninhashv2/rhys/internal-saturnin.h @@ -0,0 +1,137 @@ +/* + * Copyright (C) 2020 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef LW_INTERNAL_SATURNIN_H +#define LW_INTERNAL_SATURNIN_H + +/** + * \file internal-saturnin.h + * \brief Saturnin block cipher. + * + * References: https://project.inria.fr/saturnin/ + */ + +#include "internal-util.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * \brief Size of a Saturnin block in bytes. + */ +#define SATURNIN_BLOCK_SIZE 32 + +/** + * \brief Domain separator index 1 for the 10-round version of Saturnin. + */ +#define SATURNIN_DOMAIN_10_1 0 + +/** + * \brief Domain separator index 2 for the 10-round version of Saturnin. + */ +#define SATURNIN_DOMAIN_10_2 10 + +/** + * \brief Domain separator index 3 for the 10-round version of Saturnin. + */ +#define SATURNIN_DOMAIN_10_3 20 + +/** + * \brief Domain separator index 4 for the 10-round version of Saturnin. + */ +#define SATURNIN_DOMAIN_10_4 30 + +/** + * \brief Domain separator index 5 for the 10-round version of Saturnin. + */ +#define SATURNIN_DOMAIN_10_5 40 + +/** + * \brief Domain separator index 6 for the 10-round version of Saturnin. + */ +#define SATURNIN_DOMAIN_10_6 50 + +/** + * \brief Domain separator index 7 for the 16-round version of Saturnin. + */ +#define SATURNIN_DOMAIN_16_7 60 + +/** + * \brief Domain separator index 8 for the 16-round version of Saturnin. + */ +#define SATURNIN_DOMAIN_16_8 76 + +/** + * \brief Structure of the key schedule for Saturnin. + */ +typedef struct +{ + /** Pre-computed round keys for Saturnin */ + uint32_t k[16]; + +} saturnin_key_schedule_t; + +/** + * \brief Sets up a key schedule for Saturnin. + * + * \param ks Points to the key schedule to initialize. + * \param key Points to the 32 bytes of the key data. + */ +void saturnin_setup_key + (saturnin_key_schedule_t *ks, const unsigned char *key); + +/** + * \brief Encrypts a 256-bit block with Saturnin. + * + * \param ks Points to the Saturnin key schedule. + * \param output Output buffer which must be at least 32 bytes in length. + * \param input Input buffer which must be at least 32 bytes in length. + * \param domain Domain separator and round count indicator. + * + * The \a input and \a output buffers can be the same buffer for + * in-place encryption. + */ +void saturnin_encrypt_block + (const saturnin_key_schedule_t *ks, unsigned char *output, + const unsigned char *input, unsigned domain); + +/** + * \brief Decrypts a 256-bit block with Saturnin. + * + * \param ks Points to the Saturnin key schedule. + * \param output Output buffer which must be at least 32 bytes in length. + * \param input Input buffer which must be at least 32 bytes in length. + * \param domain Domain separator and round count indicator. + * + * The \a input and \a output buffers can be the same buffer for + * in-place decryption. + */ +void saturnin_decrypt_block + (const saturnin_key_schedule_t *ks, unsigned char *output, + const unsigned char *input, unsigned domain); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/saturnin/Implementations/crypto_hash/saturninhashv2/rhys/saturnin.c b/saturnin/Implementations/crypto_hash/saturninhashv2/rhys/saturnin.c index 734fc69..d2bd2cc 100644 --- a/saturnin/Implementations/crypto_hash/saturninhashv2/rhys/saturnin.c +++ b/saturnin/Implementations/crypto_hash/saturninhashv2/rhys/saturnin.c @@ -21,7 +21,7 @@ */ #include "saturnin.h" -#include "internal-util.h" +#include "internal-saturnin.h" #include aead_cipher_t const saturnin_cipher = { @@ -57,440 +57,22 @@ aead_hash_algorithm_t const saturnin_hash_algorithm = { 0 /* squeeze */ }; -/* Round constant tables for various combinations of rounds and domain_sep */ -static uint32_t const RC_10_1[] = { - 0x4eb026c2, 0x90595303, 0xaa8fe632, 0xfe928a92, 0x4115a419, - 0x93539532, 0x5db1cc4e, 0x541515ca, 0xbd1f55a8, 0x5a6e1a0d -}; -static uint32_t const RC_10_2[] = { - 0x4e4526b5, 0xa3565ff0, 0x0f8f20d8, 0x0b54bee1, 0x7d1a6c9d, - 0x17a6280a, 0xaa46c986, 0xc1199062, 0x182c5cde, 0xa00d53fe -}; -static uint32_t const RC_10_3[] = { - 0x4e162698, 0xb2535ba1, 0x6c8f9d65, 0x5816ad30, 0x691fd4fa, - 0x6bf5bcf9, 0xf8eb3525, 0xb21decfa, 0x7b3da417, 0xf62c94b4 -}; -static uint32_t const RC_10_4[] = { - 0x4faf265b, 0xc5484616, 0x45dcad21, 0xe08bd607, 0x0504fdb8, - 0x1e1f5257, 0x45fbc216, 0xeb529b1f, 0x52194e32, 0x5498c018 -}; -static uint32_t const RC_10_5[] = { - 0x4ffc2676, 0xd44d4247, 0x26dc109c, 0xb3c9c5d6, 0x110145df, - 0x624cc6a4, 0x17563eb5, 0x9856e787, 0x3108b6fb, 0x02b90752 -}; -static uint32_t const RC_10_6[] = { - 0x4f092601, 0xe7424eb4, 0x83dcd676, 0x460ff1a5, 0x2d0e8d5b, - 0xe6b97b9c, 0xe0a13b7d, 0x0d5a622f, 0x943bbf8d, 0xf8da4ea1 -}; -static uint32_t const RC_16_7[] = { - 0x3fba180c, 0x563ab9ab, 0x125ea5ef, 0x859da26c, 0xb8cf779b, - 0x7d4de793, 0x07efb49f, 0x8d525306, 0x1e08e6ab, 0x41729f87, - 0x8c4aef0a, 0x4aa0c9a7, 0xd93a95ef, 0xbb00d2af, 0xb62c5bf0, - 0x386d94d8 -}; -static uint32_t const RC_16_8[] = { - 0x3c9b19a7, 0xa9098694, 0x23f878da, 0xa7b647d3, 0x74fc9d78, - 0xeacaae11, 0x2f31a677, 0x4cc8c054, 0x2f51ca05, 0x5268f195, - 0x4f5b8a2b, 0xf614b4ac, 0xf1d95401, 0x764d2568, 0x6a493611, - 0x8eef9c3e -}; - -/* Rotate the 4-bit nibbles within a 16-bit word left */ -#define leftRotate4_N(a, mask1, bits1, mask2, bits2) \ - do { \ - uint32_t _temp = (a); \ - (a) = ((_temp & (mask1)) << (bits1)) | \ - ((_temp & ((mask1) ^ (uint32_t)0xFFFFU)) >> (4 - (bits1))) | \ - ((_temp & (((uint32_t)(mask2)) << 16)) << (bits2)) | \ - ((_temp & (((uint32_t)((mask2)) << 16) ^ 0xFFFF0000U)) >> (4 - (bits2))); \ - } while (0) - -/* Rotate 16-bit subwords left */ -#define leftRotate16_N(a, mask1, bits1, mask2, bits2) \ - do { \ - uint32_t _temp = (a); \ - (a) = ((_temp & (mask1)) << (bits1)) | \ - ((_temp & ((mask1) ^ (uint32_t)0xFFFFU)) >> (16 - (bits1))) | \ - ((_temp & (((uint32_t)(mask2)) << 16)) << (bits2)) | \ - ((_temp & (((uint32_t)((mask2)) << 16) ^ 0xFFFF0000U)) >> (16 - (bits2))); \ - } while (0) - -/* XOR the SATURNIN state with the key */ -#define saturnin_xor_key() \ - do { \ - for (index = 0; index < 8; ++index) \ - S[index] ^= K[index]; \ - } while (0) - -/* XOR the SATURNIN state with a rotated version of the key */ -#define saturnin_xor_key_rotated() \ - do { \ - for (index = 0; index < 8; ++index) \ - S[index] ^= K[index + 8]; \ - } while (0) - -/* Apply an SBOX layer for SATURNIN - definition from the specification */ -#define S_LAYER(a, b, c, d) \ - do { \ - (a) ^= (b) & (c); \ - (b) ^= (a) | (d); \ - (d) ^= (b) | (c); \ - (c) ^= (b) & (d); \ - (b) ^= (a) | (c); \ - (a) ^= (b) | (d); \ - } while (0) - -/* Apply an SBOX layer for SATURNIN in reverse */ -#define S_LAYER_INVERSE(a, b, c, d) \ - do { \ - (a) ^= (b) | (d); \ - (b) ^= (a) | (c); \ - (c) ^= (b) & (d); \ - (d) ^= (b) | (c); \ - (b) ^= (a) | (d); \ - (a) ^= (b) & (c); \ - } while (0) - -/** - * \brief Applies the SBOX to the SATURNIN state. - * - * \param S The state. - */ -static void saturnin_sbox(uint32_t S[8]) -{ - uint32_t a, b, c, d; - - /* PI_0 on the first half of the state */ - a = S[0]; b = S[1]; c = S[2]; d = S[3]; - S_LAYER(a, b, c, d); - S[0] = b; S[1] = c; S[2] = d; S[3] = a; - - /* PI_1 on the second half of the state */ - a = S[4]; b = S[5]; c = S[6]; d = S[7]; - S_LAYER(a, b, c, d); - S[4] = d; S[5] = b; S[6] = a; S[7] = c; -} - -/** - * \brief Applies the inverse of the SBOX to the SATURNIN state. - * - * \param S The state. - */ -static void saturnin_sbox_inverse(uint32_t S[8]) -{ - uint32_t a, b, c, d; - - /* PI_0 on the first half of the state */ - b = S[0]; c = S[1]; d = S[2]; a = S[3]; - S_LAYER_INVERSE(a, b, c, d); - S[0] = a; S[1] = b; S[2] = c; S[3] = d; - - /* PI_1 on the second half of the state */ - d = S[4]; b = S[5]; a = S[6]; c = S[7]; - S_LAYER_INVERSE(a, b, c, d); - S[4] = a; S[5] = b; S[6] = c; S[7] = d; -} - -/** - * \brief Applies the MDS matrix to the SATURNIN state. - * - * \param S The state. - */ -static void saturnin_mds(uint32_t S[8]) -{ - uint32_t x0, x1, x2, x3, x4, x5, x6, x7; - uint32_t tmp; - - /* Load the state into temporary working variables */ - x0 = S[0]; x1 = S[1]; x2 = S[2]; x3 = S[3]; - x4 = S[4]; x5 = S[5]; x6 = S[6]; x7 = S[7]; - - /* Apply the MDS matrix to the state */ - #define SWAP(a) (((a) << 16) | ((a) >> 16)) - #define MUL(x0, x1, x2, x3, tmp) \ - do { \ - tmp = x0; x0 = x1; x1 = x2; x2 = x3; x3 = tmp ^ x0; \ - } while (0) - x0 ^= x4; x1 ^= x5; x2 ^= x6; x3 ^= x7; - MUL(x4, x5, x6, x7, tmp); - x4 ^= SWAP(x0); x5 ^= SWAP(x1); - x6 ^= SWAP(x2); x7 ^= SWAP(x3); - MUL(x0, x1, x2, x3, tmp); - MUL(x0, x1, x2, x3, tmp); - x0 ^= x4; x1 ^= x5; x2 ^= x6; x3 ^= x7; - x4 ^= SWAP(x0); x5 ^= SWAP(x1); - x6 ^= SWAP(x2); x7 ^= SWAP(x3); - - /* Store the temporary working variables back into the state */ - S[0] = x0; S[1] = x1; S[2] = x2; S[3] = x3; - S[4] = x4; S[5] = x5; S[6] = x6; S[7] = x7; -} - -/** - * \brief Applies the inverse of the MDS matrix to the SATURNIN state. - * - * \param S The state. - */ -static void saturnin_mds_inverse(uint32_t S[8]) -{ - uint32_t x0, x1, x2, x3, x4, x5, x6, x7; - uint32_t tmp; - - /* Load the state into temporary working variables */ - x0 = S[0]; x1 = S[1]; x2 = S[2]; x3 = S[3]; - x4 = S[4]; x5 = S[5]; x6 = S[6]; x7 = S[7]; - - /* Apply the inverse of the MDS matrix to the state */ - #define MULINV(x0, x1, x2, x3, tmp) \ - do { \ - tmp = x3; x3 = x2; x2 = x1; x1 = x0; x0 = x1 ^ tmp; \ - } while (0) - x6 ^= SWAP(x2); x7 ^= SWAP(x3); - x4 ^= SWAP(x0); x5 ^= SWAP(x1); - x0 ^= x4; x1 ^= x5; x2 ^= x6; x3 ^= x7; - MULINV(x0, x1, x2, x3, tmp); - MULINV(x0, x1, x2, x3, tmp); - x6 ^= SWAP(x2); x7 ^= SWAP(x3); - x4 ^= SWAP(x0); x5 ^= SWAP(x1); - MULINV(x4, x5, x6, x7, tmp); - x0 ^= x4; x1 ^= x5; x2 ^= x6; x3 ^= x7; - - /* Store the temporary working variables back into the state */ - S[0] = x0; S[1] = x1; S[2] = x2; S[3] = x3; - S[4] = x4; S[5] = x5; S[6] = x6; S[7] = x7; -} - -/** - * \brief Applies the slice permutation to the SATURNIN state. - * - * \param S The state. - */ -static void saturnin_slice(uint32_t S[8]) -{ - leftRotate4_N(S[0], 0xFFFFU, 0, 0x3333, 2); - leftRotate4_N(S[1], 0xFFFFU, 0, 0x3333, 2); - leftRotate4_N(S[2], 0xFFFFU, 0, 0x3333, 2); - leftRotate4_N(S[3], 0xFFFFU, 0, 0x3333, 2); - - leftRotate4_N(S[4], 0x7777U, 1, 0x1111, 3); - leftRotate4_N(S[5], 0x7777U, 1, 0x1111, 3); - leftRotate4_N(S[6], 0x7777U, 1, 0x1111, 3); - leftRotate4_N(S[7], 0x7777U, 1, 0x1111, 3); -} - -/** - * \brief Applies the inverse of the slice permutation to the SATURNIN state. - * - * \param S The state. - */ -static void saturnin_slice_inverse(uint32_t S[8]) -{ - leftRotate4_N(S[0], 0xFFFFU, 0, 0x3333, 2); - leftRotate4_N(S[1], 0xFFFFU, 0, 0x3333, 2); - leftRotate4_N(S[2], 0xFFFFU, 0, 0x3333, 2); - leftRotate4_N(S[3], 0xFFFFU, 0, 0x3333, 2); - - leftRotate4_N(S[4], 0x1111U, 3, 0x7777, 1); - leftRotate4_N(S[5], 0x1111U, 3, 0x7777, 1); - leftRotate4_N(S[6], 0x1111U, 3, 0x7777, 1); - leftRotate4_N(S[7], 0x1111U, 3, 0x7777, 1); -} - -/** - * \brief Applies the sheet permutation to the SATURNIN state. - * - * \param S The state. - */ -static void saturnin_sheet(uint32_t S[8]) -{ - leftRotate16_N(S[0], 0xFFFFU, 0, 0x00FF, 8); - leftRotate16_N(S[1], 0xFFFFU, 0, 0x00FF, 8); - leftRotate16_N(S[2], 0xFFFFU, 0, 0x00FF, 8); - leftRotate16_N(S[3], 0xFFFFU, 0, 0x00FF, 8); - - leftRotate16_N(S[4], 0x0FFFU, 4, 0x000F, 12); - leftRotate16_N(S[5], 0x0FFFU, 4, 0x000F, 12); - leftRotate16_N(S[6], 0x0FFFU, 4, 0x000F, 12); - leftRotate16_N(S[7], 0x0FFFU, 4, 0x000F, 12); -} - -/** - * \brief Applies the inverse of the sheet permutation to the SATURNIN state. - * - * \param S The state. - */ -static void saturnin_sheet_inverse(uint32_t S[8]) -{ - leftRotate16_N(S[0], 0xFFFFU, 0, 0x00FF, 8); - leftRotate16_N(S[1], 0xFFFFU, 0, 0x00FF, 8); - leftRotate16_N(S[2], 0xFFFFU, 0, 0x00FF, 8); - leftRotate16_N(S[3], 0xFFFFU, 0, 0x00FF, 8); - - leftRotate16_N(S[4], 0x000FU, 12, 0x0FFF, 4); - leftRotate16_N(S[5], 0x000FU, 12, 0x0FFF, 4); - leftRotate16_N(S[6], 0x000FU, 12, 0x0FFF, 4); - leftRotate16_N(S[7], 0x000FU, 12, 0x0FFF, 4); -} - -/** - * \brief Encrypts a 256-bit block with the SATURNIN block cipher. - * - * \param output Ciphertext output block, 32 bytes. - * \param input Plaintext input block, 32 bytes. - * \param key Points to the 32 byte key for the block cipher. - * \param rounds Number of rounds to perform. - * \param RC Round constants to use for domain separation. - * - * The \a input and \a output buffers can be the same. - * - * \sa saturnin_block_decrypt() - */ -static void saturnin_block_encrypt - (unsigned char *output, const unsigned char *input, - const unsigned char *key, unsigned rounds, const uint32_t *RC) -{ - uint32_t K[16]; - uint32_t S[8]; - uint32_t temp; - unsigned index; - - /* Unpack the key and the input block */ - for (index = 0; index < 16; index += 2) { - temp = ((uint32_t)(key[index])) | - (((uint32_t)(key[index + 1])) << 8) | - (((uint32_t)(key[index + 16])) << 16) | - (((uint32_t)(key[index + 17])) << 24); - K[index / 2] = temp; - K[8 + (index / 2)] = ((temp & 0x001F001FU) << 11) | - ((temp >> 5) & 0x07FF07FFU); - S[index / 2] = ((uint32_t)(input[index])) | - (((uint32_t)(input[index + 1])) << 8) | - (((uint32_t)(input[index + 16])) << 16) | - (((uint32_t)(input[index + 17])) << 24); - } - - /* XOR the key into the state */ - saturnin_xor_key(); - - /* Perform all encryption rounds */ - for (; rounds > 0; rounds -= 2, RC += 2) { - saturnin_sbox(S); - saturnin_mds(S); - saturnin_sbox(S); - saturnin_slice(S); - saturnin_mds(S); - saturnin_slice_inverse(S); - S[0] ^= RC[0]; - saturnin_xor_key_rotated(); - - saturnin_sbox(S); - saturnin_mds(S); - saturnin_sbox(S); - saturnin_sheet(S); - saturnin_mds(S); - saturnin_sheet_inverse(S); - S[0] ^= RC[1]; - saturnin_xor_key(); - } - - /* Encode the state into the output block */ - for (index = 0; index < 16; index += 2) { - temp = S[index / 2]; - output[index] = (uint8_t)temp; - output[index + 1] = (uint8_t)(temp >> 8); - output[index + 16] = (uint8_t)(temp >> 16); - output[index + 17] = (uint8_t)(temp >> 24); - } -} - -/** - * \brief Decrypts a 256-bit block with the SATURNIN block cipher. - * - * \param output Plaintext output block, 32 bytes. - * \param input Ciphertext input block, 32 bytes. - * \param key Points to the 32 byte key for the block cipher. - * \param rounds Number of rounds to perform. - * \param RC Round constants to use for domain separation. - * - * The \a input and \a output buffers can be the same. - * - * \sa saturnin_block_encrypt() - */ -static void saturnin_block_decrypt - (unsigned char *output, const unsigned char *input, - const unsigned char *key, unsigned rounds, const uint32_t *RC) -{ - uint32_t K[16]; - uint32_t S[8]; - uint32_t temp; - unsigned index; - - /* Unpack the key and the input block */ - for (index = 0; index < 16; index += 2) { - temp = ((uint32_t)(key[index])) | - (((uint32_t)(key[index + 1])) << 8) | - (((uint32_t)(key[index + 16])) << 16) | - (((uint32_t)(key[index + 17])) << 24); - K[index / 2] = temp; - K[8 + (index / 2)] = ((temp & 0x001F001FU) << 11) | - ((temp >> 5) & 0x07FF07FFU); - S[index / 2] = ((uint32_t)(input[index])) | - (((uint32_t)(input[index + 1])) << 8) | - (((uint32_t)(input[index + 16])) << 16) | - (((uint32_t)(input[index + 17])) << 24); - } - - /* Perform all decryption rounds */ - RC += rounds - 2; - for (; rounds > 0; rounds -= 2, RC -= 2) { - saturnin_xor_key(); - S[0] ^= RC[1]; - saturnin_sheet(S); - saturnin_mds_inverse(S); - saturnin_sheet_inverse(S); - saturnin_sbox_inverse(S); - saturnin_mds_inverse(S); - saturnin_sbox_inverse(S); - - saturnin_xor_key_rotated(); - S[0] ^= RC[0]; - saturnin_slice(S); - saturnin_mds_inverse(S); - saturnin_slice_inverse(S); - saturnin_sbox_inverse(S); - saturnin_mds_inverse(S); - saturnin_sbox_inverse(S); - } - - /* XOR the key into the state */ - saturnin_xor_key(); - - /* Encode the state into the output block */ - for (index = 0; index < 16; index += 2) { - temp = S[index / 2]; - output[index] = (uint8_t)temp; - output[index + 1] = (uint8_t)(temp >> 8); - output[index + 16] = (uint8_t)(temp >> 16); - output[index + 17] = (uint8_t)(temp >> 24); - } -} - /** * \brief Encrypts a 256-bit block with the SATURNIN block cipher and * then XOR's itself to generate a new key. * * \param block Block to be encrypted and then XOR'ed with itself. * \param key Points to the 32 byte key for the block cipher. - * \param rounds Number of rounds to perform. - * \param RC Round constants to use for domain separation. + * \param domain Domain separator and round counter. */ -void saturnin_block_encrypt_xor - (const unsigned char *block, unsigned char *key, - unsigned rounds, const uint32_t *RC) +static void saturnin_block_encrypt_xor + (const unsigned char *block, unsigned char *key, unsigned domain) { - unsigned char temp[32]; - saturnin_block_encrypt(temp, block, key, rounds, RC); - lw_xor_block_2_src(key, block, temp, 32); + saturnin_key_schedule_t ks; + unsigned char *temp = (unsigned char *)ks.k; /* Reuse some stack space */ + saturnin_setup_key(&ks, key); + saturnin_encrypt_block(&ks, temp, block, domain); + lw_xor_block_2_src(key, block, temp, SATURNIN_BLOCK_SIZE); } /** @@ -499,20 +81,20 @@ void saturnin_block_encrypt_xor * \param c Output ciphertext buffer. * \param m Input plaintext buffer. * \param mlen Length of the plaintext in bytes. - * \param k Points to the 32-byte key. + * \param ks Points to the key schedule. * \param block Points to the pre-formatted nonce block. */ static void saturnin_ctr_encrypt (unsigned char *c, const unsigned char *m, unsigned long long mlen, - const unsigned char *k, unsigned char *block) + const saturnin_key_schedule_t *ks, unsigned char *block) { /* Note: Specification requires a 95-bit counter but we only use 32-bit. * This limits the maximum packet size to 128Gb. That should be OK */ uint32_t counter = 1; - unsigned char out[32]; + unsigned char out[SATURNIN_BLOCK_SIZE]; while (mlen >= 32) { be_store_word32(block + 28, counter); - saturnin_block_encrypt(out, block, k, 10, RC_10_1); + saturnin_encrypt_block(ks, out, block, SATURNIN_DOMAIN_10_1); lw_xor_block_2_src(c, out, m, 32); c += 32; m += 32; @@ -521,7 +103,7 @@ static void saturnin_ctr_encrypt } if (mlen > 0) { be_store_word32(block + 28, counter); - saturnin_block_encrypt(out, block, k, 10, RC_10_1); + saturnin_encrypt_block(ks, out, block, SATURNIN_DOMAIN_10_1); lw_xor_block_2_src(c, out, m, (unsigned)mlen); } } @@ -533,18 +115,17 @@ static void saturnin_ctr_encrypt * \param block Temporary block of 32 bytes from the caller. * \param m Points to the message to be authenticated. * \param mlen Length of the message to be authenticated in bytes. - * \param rounds Number of rounds to perform. - * \param RC1 Round constants to use for domain separation on full blocks. - * \param RC2 Round constants to use for domain separation on the last block. + * \param domain1 Round count and domain separator for full blocks. + * \param domain2 Round count and domain separator for the last block. */ static void saturnin_authenticate (unsigned char *tag, unsigned char *block, const unsigned char *m, unsigned long long mlen, - unsigned rounds, const uint32_t *RC1, const uint32_t *RC2) + unsigned domain1, unsigned domain2) { unsigned temp; while (mlen >= 32) { - saturnin_block_encrypt_xor(m, tag, rounds, RC1); + saturnin_block_encrypt_xor(m, tag, domain1); m += 32; mlen -= 32; } @@ -552,7 +133,7 @@ static void saturnin_authenticate memcpy(block, m, temp); block[temp] = 0x80; memset(block + temp + 1, 0, 31 - temp); - saturnin_block_encrypt_xor(block, tag, rounds, RC2); + saturnin_block_encrypt_xor(block, tag, domain2); } int saturnin_aead_encrypt @@ -563,6 +144,7 @@ int saturnin_aead_encrypt const unsigned char *npub, const unsigned char *k) { + saturnin_key_schedule_t ks; unsigned char block[32]; unsigned char *tag; (void)nsec; @@ -576,17 +158,20 @@ int saturnin_aead_encrypt memset(block + 17, 0, 15); /* Encrypt the plaintext in counter mode to produce the ciphertext */ - saturnin_ctr_encrypt(c, m, mlen, k, block); + saturnin_setup_key(&ks, k); + saturnin_ctr_encrypt(c, m, mlen, &ks, block); /* Set the counter back to zero and then encrypt the nonce */ tag = c + mlen; memcpy(tag, k, 32); memset(block + 17, 0, 15); - saturnin_block_encrypt_xor(block, tag, 10, RC_10_2); + saturnin_block_encrypt_xor(block, tag, SATURNIN_DOMAIN_10_2); /* Authenticate the associated data and the ciphertext */ - saturnin_authenticate(tag, block, ad, adlen, 10, RC_10_2, RC_10_3); - saturnin_authenticate(tag, block, c, mlen, 10, RC_10_4, RC_10_5); + saturnin_authenticate + (tag, block, ad, adlen, SATURNIN_DOMAIN_10_2, SATURNIN_DOMAIN_10_3); + saturnin_authenticate + (tag, block, c, mlen, SATURNIN_DOMAIN_10_4, SATURNIN_DOMAIN_10_5); return 0; } @@ -598,6 +183,7 @@ int saturnin_aead_decrypt const unsigned char *npub, const unsigned char *k) { + saturnin_key_schedule_t ks; unsigned char block[32]; unsigned char tag[32]; (void)nsec; @@ -614,17 +200,20 @@ int saturnin_aead_decrypt /* Encrypt the nonce to initialize the authentication phase */ memcpy(tag, k, 32); - saturnin_block_encrypt_xor(block, tag, 10, RC_10_2); + saturnin_block_encrypt_xor(block, tag, SATURNIN_DOMAIN_10_2); /* Authenticate the associated data and the ciphertext */ - saturnin_authenticate(tag, block, ad, adlen, 10, RC_10_2, RC_10_3); - saturnin_authenticate(tag, block, c, *mlen, 10, RC_10_4, RC_10_5); + saturnin_authenticate + (tag, block, ad, adlen, SATURNIN_DOMAIN_10_2, SATURNIN_DOMAIN_10_3); + saturnin_authenticate + (tag, block, c, *mlen, SATURNIN_DOMAIN_10_4, SATURNIN_DOMAIN_10_5); /* Decrypt the ciphertext in counter mode to produce the plaintext */ memcpy(block, npub, 16); block[16] = 0x80; memset(block + 17, 0, 15); - saturnin_ctr_encrypt(m, c, *mlen, k, block); + saturnin_setup_key(&ks, k); + saturnin_ctr_encrypt(m, c, *mlen, &ks, block); /* Check the authentication tag at the end of the message */ return aead_check_tag @@ -639,6 +228,7 @@ int saturnin_short_aead_encrypt const unsigned char *npub, const unsigned char *k) { + saturnin_key_schedule_t ks; unsigned char block[32]; unsigned temp; (void)nsec; @@ -656,7 +246,8 @@ int saturnin_short_aead_encrypt memset(block + 17 + temp, 0, 15 - temp); /* Encrypt the input block to produce the output ciphertext */ - saturnin_block_encrypt(c, block, k, 10, RC_10_6); + saturnin_setup_key(&ks, k); + saturnin_encrypt_block(&ks, c, block, SATURNIN_DOMAIN_10_6); *clen = 32; return 0; } @@ -669,6 +260,7 @@ int saturnin_short_aead_decrypt const unsigned char *npub, const unsigned char *k) { + saturnin_key_schedule_t ks; unsigned char block[32]; unsigned check1, check2, len; int index, result; @@ -682,7 +274,8 @@ int saturnin_short_aead_decrypt return -1; /* Decrypt the ciphertext block */ - saturnin_block_decrypt(block, c, k, 10, RC_10_6); + saturnin_setup_key(&ks, k); + saturnin_decrypt_block(&ks, block, c, SATURNIN_DOMAIN_10_6); /* Verify that the output block starts with the nonce and that it is * padded correctly. We need to do this very carefully to avoid leaking @@ -723,7 +316,8 @@ int saturnin_hash unsigned char tag[32]; unsigned char block[32]; memset(tag, 0, sizeof(tag)); - saturnin_authenticate(tag, block, in, inlen, 16, RC_16_7, RC_16_8); + saturnin_authenticate + (tag, block, in, inlen, SATURNIN_DOMAIN_16_7, SATURNIN_DOMAIN_16_8); memcpy(out, tag, 32); return 0; } @@ -752,12 +346,14 @@ void saturnin_hash_update state->s.count = 0; in += temp; inlen -= temp; - saturnin_block_encrypt_xor(state->s.block, state->s.hash, 16, RC_16_7); + saturnin_block_encrypt_xor + (state->s.block, state->s.hash, SATURNIN_DOMAIN_16_7); } /* Process full blocks that are aligned at state->s.count == 0 */ while (inlen >= 32) { - saturnin_block_encrypt_xor(in, state->s.hash, 16, RC_16_7); + saturnin_block_encrypt_xor + (in, state->s.hash, SATURNIN_DOMAIN_16_7); in += 32; inlen -= 32; } @@ -776,6 +372,7 @@ void saturnin_hash_finalize memset(state->s.block + state->s.count + 1, 0, 31 - state->s.count); /* Generate the final hash value */ - saturnin_block_encrypt_xor(state->s.block, state->s.hash, 16, RC_16_8); + saturnin_block_encrypt_xor + (state->s.block, state->s.hash, SATURNIN_DOMAIN_16_8); memcpy(out, state->s.hash, 32); } diff --git a/skinny/Implementations/crypto_aead/skinnyaeadtk296128v1/rhys/internal-skinny128.c b/skinny/Implementations/crypto_aead/skinnyaeadtk296128v1/rhys/internal-skinny128.c index 579ced1..d4adca0 100644 --- a/skinny/Implementations/crypto_aead/skinnyaeadtk296128v1/rhys/internal-skinny128.c +++ b/skinny/Implementations/crypto_aead/skinnyaeadtk296128v1/rhys/internal-skinny128.c @@ -90,7 +90,7 @@ void skinny_128_384_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -99,11 +99,25 @@ void skinny_128_384_init schedule[0] = TK2[0] ^ TK3[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ TK3[1] ^ (rc >> 4); - /* Permute TK2 and TK3 for the next round */ - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); + /* Permute the bottom half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_permute_tk_half(TK3[2], TK3[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + skinny128_LFSR3(TK3[2]); + skinny128_LFSR3(TK3[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ TK3[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ TK3[3] ^ (rc >> 4); - /* Apply the LFSR's to TK2 and TK3 */ + /* Permute the top half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); + skinny128_permute_tk_half(TK3[0], TK3[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR3(TK3[0]); @@ -112,6 +126,98 @@ void skinny_128_384_init #endif } +/** + * \brief Performs an unrolled round for Skinny-128-384 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + } while (0) + +/** + * \brief Performs an unrolled round for Skinny-128-384 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK3[(1 - half) * 2]); \ + skinny128_LFSR3(TK3[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_384_encrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -125,7 +231,6 @@ void skinny_128_384_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -150,53 +255,19 @@ void skinny_128_384_encrypt TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_384_round(s0, s1, s2, s3, 0, 0); + skinny_128_384_round(s3, s0, s1, s2, 1, 1); + skinny_128_384_round(s2, s3, s0, s1, 0, 2); + skinny_128_384_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -207,6 +278,93 @@ void skinny_128_384_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Inverse permutation on TK1 for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + s1 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s2 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK3[(1 - half) * 2]); \ + skinny128_LFSR2(TK3[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_384_decrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -218,9 +376,8 @@ void skinny_128_384_decrypt uint32_t TK3[4]; uint8_t rc = 0x15; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -251,7 +408,7 @@ void skinny_128_384_decrypt skinny128_fast_forward_tk(TK2); skinny128_fast_forward_tk(TK3); for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2 and TK3. + /* Also fast-forward the LFSR's on every byte of TK2 and TK3 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -263,50 +420,20 @@ void skinny_128_384_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_inv_permute_tk(TK3); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); - skinny128_LFSR2(TK3[2]); - skinny128_LFSR2(TK3[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); + skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_384_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_384_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_384_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_384_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_384_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_384_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_384_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -316,6 +443,57 @@ void skinny_128_384_decrypt le_store_word32(output + 12, s3); } +/** + * \def skinny_128_384_round_tk2(s0, s1, s2, s3, half) + * \brief Performs an unrolled round for skinny_128_384_encrypt_tk2(). + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#if SKINNY_128_SMALL_SCHEDULE +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round_tk_full(s0, s1, s2, s3, half) +#else /* !SKINNY_128_SMALL_SCHEDULE */ +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2] ^ TK2[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1] ^ \ + TK2[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 and TK2 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) +#endif /* !SKINNY_128_SMALL_SCHEDULE */ + void skinny_128_384_encrypt_tk2 (skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input, const unsigned char *tk2) @@ -329,7 +507,6 @@ void skinny_128_384_encrypt_tk2 #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -354,53 +531,14 @@ void skinny_128_384_encrypt_tk2 TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0] ^ TK2[0]; - s1 ^= schedule[1] ^ TK1[1] ^ TK2[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK3); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); -#else - schedule += 2; + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk2(s0, s1, s2, s3, 0, 0); + skinny_128_384_round_tk2(s3, s0, s1, s2, 1, 1); + skinny_128_384_round_tk2(s2, s3, s0, s1, 0, 2); + skinny_128_384_round_tk2(s1, s2, s3, s0, 1, 3); +#if !SKINNY_128_SMALL_SCHEDULE + schedule += 8; #endif } @@ -419,7 +557,6 @@ void skinny_128_384_encrypt_tk_full uint32_t TK1[4]; uint32_t TK2[4]; uint32_t TK3[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -443,45 +580,12 @@ void skinny_128_384_encrypt_tk_full TK3[2] = le_load_word32(key + 40); TK3[3] = le_load_word32(key + 44); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ @@ -518,7 +622,7 @@ void skinny_128_256_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -527,16 +631,87 @@ void skinny_128_256_init schedule[0] = TK2[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ (rc >> 4); - /* Permute TK2 for the next round */ - skinny128_permute_tk(TK2); + /* Permute the bottom half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ (rc >> 4); - /* Apply the LFSR to TK2 */ + /* Permute the top half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); } #endif } +/** + * \brief Performs an unrolled round for Skinny-128-256 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled round for Skinny-128-256 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_256_encrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -549,7 +724,6 @@ void skinny_128_256_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -570,50 +744,19 @@ void skinny_128_256_encrypt TK2[3] = le_load_word32(ks->TK2 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_256_round(s0, s1, s2, s3, 0, 0); + skinny_128_256_round(s3, s0, s1, s2, 1, 1); + skinny_128_256_round(s2, s3, s0, s1, 0, 2); + skinny_128_256_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -624,6 +767,63 @@ void skinny_128_256_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_inv_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_256_decrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -634,9 +834,8 @@ void skinny_128_256_decrypt uint32_t TK2[4]; uint8_t rc = 0x09; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -658,7 +857,7 @@ void skinny_128_256_decrypt TK2[2] = le_load_word32(ks->TK2 + 8); TK2[3] = le_load_word32(ks->TK2 + 12); for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2. + /* Also fast-forward the LFSR's on every byte of TK2 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -666,47 +865,20 @@ void skinny_128_256_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); + skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_256_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_256_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_256_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_256_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_256_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_256_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_256_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -723,7 +895,6 @@ void skinny_128_256_encrypt_tk_full uint32_t s0, s1, s2, s3; uint32_t TK1[4]; uint32_t TK2[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -743,42 +914,12 @@ void skinny_128_256_encrypt_tk_full TK2[2] = le_load_word32(key + 24); TK2[3] = le_load_word32(key + 28); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ diff --git a/skinny/Implementations/crypto_aead/skinnyaeadtk296128v1/rhys/internal-skinnyutil.h b/skinny/Implementations/crypto_aead/skinnyaeadtk296128v1/rhys/internal-skinnyutil.h index 83136cb..8a5296d 100644 --- a/skinny/Implementations/crypto_aead/skinnyaeadtk296128v1/rhys/internal-skinnyutil.h +++ b/skinny/Implementations/crypto_aead/skinnyaeadtk296128v1/rhys/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: diff --git a/skinny/Implementations/crypto_aead/skinnyaeadtk29664v1/rhys/internal-skinny128.c b/skinny/Implementations/crypto_aead/skinnyaeadtk29664v1/rhys/internal-skinny128.c index 579ced1..d4adca0 100644 --- a/skinny/Implementations/crypto_aead/skinnyaeadtk29664v1/rhys/internal-skinny128.c +++ b/skinny/Implementations/crypto_aead/skinnyaeadtk29664v1/rhys/internal-skinny128.c @@ -90,7 +90,7 @@ void skinny_128_384_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -99,11 +99,25 @@ void skinny_128_384_init schedule[0] = TK2[0] ^ TK3[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ TK3[1] ^ (rc >> 4); - /* Permute TK2 and TK3 for the next round */ - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); + /* Permute the bottom half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_permute_tk_half(TK3[2], TK3[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + skinny128_LFSR3(TK3[2]); + skinny128_LFSR3(TK3[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ TK3[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ TK3[3] ^ (rc >> 4); - /* Apply the LFSR's to TK2 and TK3 */ + /* Permute the top half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); + skinny128_permute_tk_half(TK3[0], TK3[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR3(TK3[0]); @@ -112,6 +126,98 @@ void skinny_128_384_init #endif } +/** + * \brief Performs an unrolled round for Skinny-128-384 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + } while (0) + +/** + * \brief Performs an unrolled round for Skinny-128-384 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK3[(1 - half) * 2]); \ + skinny128_LFSR3(TK3[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_384_encrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -125,7 +231,6 @@ void skinny_128_384_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -150,53 +255,19 @@ void skinny_128_384_encrypt TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_384_round(s0, s1, s2, s3, 0, 0); + skinny_128_384_round(s3, s0, s1, s2, 1, 1); + skinny_128_384_round(s2, s3, s0, s1, 0, 2); + skinny_128_384_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -207,6 +278,93 @@ void skinny_128_384_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Inverse permutation on TK1 for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + s1 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s2 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK3[(1 - half) * 2]); \ + skinny128_LFSR2(TK3[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_384_decrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -218,9 +376,8 @@ void skinny_128_384_decrypt uint32_t TK3[4]; uint8_t rc = 0x15; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -251,7 +408,7 @@ void skinny_128_384_decrypt skinny128_fast_forward_tk(TK2); skinny128_fast_forward_tk(TK3); for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2 and TK3. + /* Also fast-forward the LFSR's on every byte of TK2 and TK3 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -263,50 +420,20 @@ void skinny_128_384_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_inv_permute_tk(TK3); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); - skinny128_LFSR2(TK3[2]); - skinny128_LFSR2(TK3[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); + skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_384_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_384_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_384_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_384_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_384_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_384_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_384_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -316,6 +443,57 @@ void skinny_128_384_decrypt le_store_word32(output + 12, s3); } +/** + * \def skinny_128_384_round_tk2(s0, s1, s2, s3, half) + * \brief Performs an unrolled round for skinny_128_384_encrypt_tk2(). + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#if SKINNY_128_SMALL_SCHEDULE +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round_tk_full(s0, s1, s2, s3, half) +#else /* !SKINNY_128_SMALL_SCHEDULE */ +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2] ^ TK2[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1] ^ \ + TK2[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 and TK2 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) +#endif /* !SKINNY_128_SMALL_SCHEDULE */ + void skinny_128_384_encrypt_tk2 (skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input, const unsigned char *tk2) @@ -329,7 +507,6 @@ void skinny_128_384_encrypt_tk2 #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -354,53 +531,14 @@ void skinny_128_384_encrypt_tk2 TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0] ^ TK2[0]; - s1 ^= schedule[1] ^ TK1[1] ^ TK2[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK3); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); -#else - schedule += 2; + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk2(s0, s1, s2, s3, 0, 0); + skinny_128_384_round_tk2(s3, s0, s1, s2, 1, 1); + skinny_128_384_round_tk2(s2, s3, s0, s1, 0, 2); + skinny_128_384_round_tk2(s1, s2, s3, s0, 1, 3); +#if !SKINNY_128_SMALL_SCHEDULE + schedule += 8; #endif } @@ -419,7 +557,6 @@ void skinny_128_384_encrypt_tk_full uint32_t TK1[4]; uint32_t TK2[4]; uint32_t TK3[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -443,45 +580,12 @@ void skinny_128_384_encrypt_tk_full TK3[2] = le_load_word32(key + 40); TK3[3] = le_load_word32(key + 44); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ @@ -518,7 +622,7 @@ void skinny_128_256_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -527,16 +631,87 @@ void skinny_128_256_init schedule[0] = TK2[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ (rc >> 4); - /* Permute TK2 for the next round */ - skinny128_permute_tk(TK2); + /* Permute the bottom half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ (rc >> 4); - /* Apply the LFSR to TK2 */ + /* Permute the top half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); } #endif } +/** + * \brief Performs an unrolled round for Skinny-128-256 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled round for Skinny-128-256 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_256_encrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -549,7 +724,6 @@ void skinny_128_256_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -570,50 +744,19 @@ void skinny_128_256_encrypt TK2[3] = le_load_word32(ks->TK2 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_256_round(s0, s1, s2, s3, 0, 0); + skinny_128_256_round(s3, s0, s1, s2, 1, 1); + skinny_128_256_round(s2, s3, s0, s1, 0, 2); + skinny_128_256_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -624,6 +767,63 @@ void skinny_128_256_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_inv_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_256_decrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -634,9 +834,8 @@ void skinny_128_256_decrypt uint32_t TK2[4]; uint8_t rc = 0x09; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -658,7 +857,7 @@ void skinny_128_256_decrypt TK2[2] = le_load_word32(ks->TK2 + 8); TK2[3] = le_load_word32(ks->TK2 + 12); for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2. + /* Also fast-forward the LFSR's on every byte of TK2 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -666,47 +865,20 @@ void skinny_128_256_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); + skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_256_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_256_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_256_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_256_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_256_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_256_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_256_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -723,7 +895,6 @@ void skinny_128_256_encrypt_tk_full uint32_t s0, s1, s2, s3; uint32_t TK1[4]; uint32_t TK2[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -743,42 +914,12 @@ void skinny_128_256_encrypt_tk_full TK2[2] = le_load_word32(key + 24); TK2[3] = le_load_word32(key + 28); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ diff --git a/skinny/Implementations/crypto_aead/skinnyaeadtk29664v1/rhys/internal-skinnyutil.h b/skinny/Implementations/crypto_aead/skinnyaeadtk29664v1/rhys/internal-skinnyutil.h index 83136cb..8a5296d 100644 --- a/skinny/Implementations/crypto_aead/skinnyaeadtk29664v1/rhys/internal-skinnyutil.h +++ b/skinny/Implementations/crypto_aead/skinnyaeadtk29664v1/rhys/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: diff --git a/skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/rhys/internal-skinny128.c b/skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/rhys/internal-skinny128.c index 579ced1..d4adca0 100644 --- a/skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/rhys/internal-skinny128.c +++ b/skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/rhys/internal-skinny128.c @@ -90,7 +90,7 @@ void skinny_128_384_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -99,11 +99,25 @@ void skinny_128_384_init schedule[0] = TK2[0] ^ TK3[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ TK3[1] ^ (rc >> 4); - /* Permute TK2 and TK3 for the next round */ - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); + /* Permute the bottom half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_permute_tk_half(TK3[2], TK3[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + skinny128_LFSR3(TK3[2]); + skinny128_LFSR3(TK3[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ TK3[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ TK3[3] ^ (rc >> 4); - /* Apply the LFSR's to TK2 and TK3 */ + /* Permute the top half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); + skinny128_permute_tk_half(TK3[0], TK3[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR3(TK3[0]); @@ -112,6 +126,98 @@ void skinny_128_384_init #endif } +/** + * \brief Performs an unrolled round for Skinny-128-384 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + } while (0) + +/** + * \brief Performs an unrolled round for Skinny-128-384 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK3[(1 - half) * 2]); \ + skinny128_LFSR3(TK3[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_384_encrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -125,7 +231,6 @@ void skinny_128_384_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -150,53 +255,19 @@ void skinny_128_384_encrypt TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_384_round(s0, s1, s2, s3, 0, 0); + skinny_128_384_round(s3, s0, s1, s2, 1, 1); + skinny_128_384_round(s2, s3, s0, s1, 0, 2); + skinny_128_384_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -207,6 +278,93 @@ void skinny_128_384_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Inverse permutation on TK1 for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + s1 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s2 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK3[(1 - half) * 2]); \ + skinny128_LFSR2(TK3[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_384_decrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -218,9 +376,8 @@ void skinny_128_384_decrypt uint32_t TK3[4]; uint8_t rc = 0x15; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -251,7 +408,7 @@ void skinny_128_384_decrypt skinny128_fast_forward_tk(TK2); skinny128_fast_forward_tk(TK3); for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2 and TK3. + /* Also fast-forward the LFSR's on every byte of TK2 and TK3 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -263,50 +420,20 @@ void skinny_128_384_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_inv_permute_tk(TK3); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); - skinny128_LFSR2(TK3[2]); - skinny128_LFSR2(TK3[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); + skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_384_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_384_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_384_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_384_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_384_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_384_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_384_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -316,6 +443,57 @@ void skinny_128_384_decrypt le_store_word32(output + 12, s3); } +/** + * \def skinny_128_384_round_tk2(s0, s1, s2, s3, half) + * \brief Performs an unrolled round for skinny_128_384_encrypt_tk2(). + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#if SKINNY_128_SMALL_SCHEDULE +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round_tk_full(s0, s1, s2, s3, half) +#else /* !SKINNY_128_SMALL_SCHEDULE */ +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2] ^ TK2[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1] ^ \ + TK2[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 and TK2 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) +#endif /* !SKINNY_128_SMALL_SCHEDULE */ + void skinny_128_384_encrypt_tk2 (skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input, const unsigned char *tk2) @@ -329,7 +507,6 @@ void skinny_128_384_encrypt_tk2 #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -354,53 +531,14 @@ void skinny_128_384_encrypt_tk2 TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0] ^ TK2[0]; - s1 ^= schedule[1] ^ TK1[1] ^ TK2[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK3); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); -#else - schedule += 2; + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk2(s0, s1, s2, s3, 0, 0); + skinny_128_384_round_tk2(s3, s0, s1, s2, 1, 1); + skinny_128_384_round_tk2(s2, s3, s0, s1, 0, 2); + skinny_128_384_round_tk2(s1, s2, s3, s0, 1, 3); +#if !SKINNY_128_SMALL_SCHEDULE + schedule += 8; #endif } @@ -419,7 +557,6 @@ void skinny_128_384_encrypt_tk_full uint32_t TK1[4]; uint32_t TK2[4]; uint32_t TK3[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -443,45 +580,12 @@ void skinny_128_384_encrypt_tk_full TK3[2] = le_load_word32(key + 40); TK3[3] = le_load_word32(key + 44); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ @@ -518,7 +622,7 @@ void skinny_128_256_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -527,16 +631,87 @@ void skinny_128_256_init schedule[0] = TK2[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ (rc >> 4); - /* Permute TK2 for the next round */ - skinny128_permute_tk(TK2); + /* Permute the bottom half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ (rc >> 4); - /* Apply the LFSR to TK2 */ + /* Permute the top half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); } #endif } +/** + * \brief Performs an unrolled round for Skinny-128-256 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled round for Skinny-128-256 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_256_encrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -549,7 +724,6 @@ void skinny_128_256_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -570,50 +744,19 @@ void skinny_128_256_encrypt TK2[3] = le_load_word32(ks->TK2 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_256_round(s0, s1, s2, s3, 0, 0); + skinny_128_256_round(s3, s0, s1, s2, 1, 1); + skinny_128_256_round(s2, s3, s0, s1, 0, 2); + skinny_128_256_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -624,6 +767,63 @@ void skinny_128_256_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_inv_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_256_decrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -634,9 +834,8 @@ void skinny_128_256_decrypt uint32_t TK2[4]; uint8_t rc = 0x09; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -658,7 +857,7 @@ void skinny_128_256_decrypt TK2[2] = le_load_word32(ks->TK2 + 8); TK2[3] = le_load_word32(ks->TK2 + 12); for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2. + /* Also fast-forward the LFSR's on every byte of TK2 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -666,47 +865,20 @@ void skinny_128_256_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); + skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_256_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_256_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_256_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_256_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_256_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_256_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_256_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -723,7 +895,6 @@ void skinny_128_256_encrypt_tk_full uint32_t s0, s1, s2, s3; uint32_t TK1[4]; uint32_t TK2[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -743,42 +914,12 @@ void skinny_128_256_encrypt_tk_full TK2[2] = le_load_word32(key + 24); TK2[3] = le_load_word32(key + 28); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ diff --git a/skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/rhys/internal-skinnyutil.h b/skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/rhys/internal-skinnyutil.h index 83136cb..8a5296d 100644 --- a/skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/rhys/internal-skinnyutil.h +++ b/skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/rhys/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: diff --git a/skinny/Implementations/crypto_aead/skinnyaeadtk312864v1/rhys/internal-skinny128.c b/skinny/Implementations/crypto_aead/skinnyaeadtk312864v1/rhys/internal-skinny128.c index 579ced1..d4adca0 100644 --- a/skinny/Implementations/crypto_aead/skinnyaeadtk312864v1/rhys/internal-skinny128.c +++ b/skinny/Implementations/crypto_aead/skinnyaeadtk312864v1/rhys/internal-skinny128.c @@ -90,7 +90,7 @@ void skinny_128_384_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -99,11 +99,25 @@ void skinny_128_384_init schedule[0] = TK2[0] ^ TK3[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ TK3[1] ^ (rc >> 4); - /* Permute TK2 and TK3 for the next round */ - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); + /* Permute the bottom half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_permute_tk_half(TK3[2], TK3[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + skinny128_LFSR3(TK3[2]); + skinny128_LFSR3(TK3[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ TK3[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ TK3[3] ^ (rc >> 4); - /* Apply the LFSR's to TK2 and TK3 */ + /* Permute the top half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); + skinny128_permute_tk_half(TK3[0], TK3[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR3(TK3[0]); @@ -112,6 +126,98 @@ void skinny_128_384_init #endif } +/** + * \brief Performs an unrolled round for Skinny-128-384 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + } while (0) + +/** + * \brief Performs an unrolled round for Skinny-128-384 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK3[(1 - half) * 2]); \ + skinny128_LFSR3(TK3[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_384_encrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -125,7 +231,6 @@ void skinny_128_384_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -150,53 +255,19 @@ void skinny_128_384_encrypt TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_384_round(s0, s1, s2, s3, 0, 0); + skinny_128_384_round(s3, s0, s1, s2, 1, 1); + skinny_128_384_round(s2, s3, s0, s1, 0, 2); + skinny_128_384_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -207,6 +278,93 @@ void skinny_128_384_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Inverse permutation on TK1 for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + s1 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s2 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK3[(1 - half) * 2]); \ + skinny128_LFSR2(TK3[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_384_decrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -218,9 +376,8 @@ void skinny_128_384_decrypt uint32_t TK3[4]; uint8_t rc = 0x15; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -251,7 +408,7 @@ void skinny_128_384_decrypt skinny128_fast_forward_tk(TK2); skinny128_fast_forward_tk(TK3); for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2 and TK3. + /* Also fast-forward the LFSR's on every byte of TK2 and TK3 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -263,50 +420,20 @@ void skinny_128_384_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_inv_permute_tk(TK3); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); - skinny128_LFSR2(TK3[2]); - skinny128_LFSR2(TK3[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); + skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_384_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_384_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_384_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_384_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_384_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_384_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_384_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -316,6 +443,57 @@ void skinny_128_384_decrypt le_store_word32(output + 12, s3); } +/** + * \def skinny_128_384_round_tk2(s0, s1, s2, s3, half) + * \brief Performs an unrolled round for skinny_128_384_encrypt_tk2(). + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#if SKINNY_128_SMALL_SCHEDULE +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round_tk_full(s0, s1, s2, s3, half) +#else /* !SKINNY_128_SMALL_SCHEDULE */ +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2] ^ TK2[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1] ^ \ + TK2[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 and TK2 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) +#endif /* !SKINNY_128_SMALL_SCHEDULE */ + void skinny_128_384_encrypt_tk2 (skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input, const unsigned char *tk2) @@ -329,7 +507,6 @@ void skinny_128_384_encrypt_tk2 #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -354,53 +531,14 @@ void skinny_128_384_encrypt_tk2 TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0] ^ TK2[0]; - s1 ^= schedule[1] ^ TK1[1] ^ TK2[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK3); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); -#else - schedule += 2; + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk2(s0, s1, s2, s3, 0, 0); + skinny_128_384_round_tk2(s3, s0, s1, s2, 1, 1); + skinny_128_384_round_tk2(s2, s3, s0, s1, 0, 2); + skinny_128_384_round_tk2(s1, s2, s3, s0, 1, 3); +#if !SKINNY_128_SMALL_SCHEDULE + schedule += 8; #endif } @@ -419,7 +557,6 @@ void skinny_128_384_encrypt_tk_full uint32_t TK1[4]; uint32_t TK2[4]; uint32_t TK3[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -443,45 +580,12 @@ void skinny_128_384_encrypt_tk_full TK3[2] = le_load_word32(key + 40); TK3[3] = le_load_word32(key + 44); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ @@ -518,7 +622,7 @@ void skinny_128_256_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -527,16 +631,87 @@ void skinny_128_256_init schedule[0] = TK2[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ (rc >> 4); - /* Permute TK2 for the next round */ - skinny128_permute_tk(TK2); + /* Permute the bottom half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ (rc >> 4); - /* Apply the LFSR to TK2 */ + /* Permute the top half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); } #endif } +/** + * \brief Performs an unrolled round for Skinny-128-256 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled round for Skinny-128-256 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_256_encrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -549,7 +724,6 @@ void skinny_128_256_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -570,50 +744,19 @@ void skinny_128_256_encrypt TK2[3] = le_load_word32(ks->TK2 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_256_round(s0, s1, s2, s3, 0, 0); + skinny_128_256_round(s3, s0, s1, s2, 1, 1); + skinny_128_256_round(s2, s3, s0, s1, 0, 2); + skinny_128_256_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -624,6 +767,63 @@ void skinny_128_256_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_inv_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_256_decrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -634,9 +834,8 @@ void skinny_128_256_decrypt uint32_t TK2[4]; uint8_t rc = 0x09; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -658,7 +857,7 @@ void skinny_128_256_decrypt TK2[2] = le_load_word32(ks->TK2 + 8); TK2[3] = le_load_word32(ks->TK2 + 12); for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2. + /* Also fast-forward the LFSR's on every byte of TK2 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -666,47 +865,20 @@ void skinny_128_256_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); + skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_256_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_256_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_256_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_256_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_256_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_256_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_256_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -723,7 +895,6 @@ void skinny_128_256_encrypt_tk_full uint32_t s0, s1, s2, s3; uint32_t TK1[4]; uint32_t TK2[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -743,42 +914,12 @@ void skinny_128_256_encrypt_tk_full TK2[2] = le_load_word32(key + 24); TK2[3] = le_load_word32(key + 28); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ diff --git a/skinny/Implementations/crypto_aead/skinnyaeadtk312864v1/rhys/internal-skinnyutil.h b/skinny/Implementations/crypto_aead/skinnyaeadtk312864v1/rhys/internal-skinnyutil.h index 83136cb..8a5296d 100644 --- a/skinny/Implementations/crypto_aead/skinnyaeadtk312864v1/rhys/internal-skinnyutil.h +++ b/skinny/Implementations/crypto_aead/skinnyaeadtk312864v1/rhys/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: diff --git a/skinny/Implementations/crypto_aead/skinnyaeadtk396128v1/rhys/internal-skinny128.c b/skinny/Implementations/crypto_aead/skinnyaeadtk396128v1/rhys/internal-skinny128.c index 579ced1..d4adca0 100644 --- a/skinny/Implementations/crypto_aead/skinnyaeadtk396128v1/rhys/internal-skinny128.c +++ b/skinny/Implementations/crypto_aead/skinnyaeadtk396128v1/rhys/internal-skinny128.c @@ -90,7 +90,7 @@ void skinny_128_384_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -99,11 +99,25 @@ void skinny_128_384_init schedule[0] = TK2[0] ^ TK3[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ TK3[1] ^ (rc >> 4); - /* Permute TK2 and TK3 for the next round */ - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); + /* Permute the bottom half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_permute_tk_half(TK3[2], TK3[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + skinny128_LFSR3(TK3[2]); + skinny128_LFSR3(TK3[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ TK3[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ TK3[3] ^ (rc >> 4); - /* Apply the LFSR's to TK2 and TK3 */ + /* Permute the top half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); + skinny128_permute_tk_half(TK3[0], TK3[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR3(TK3[0]); @@ -112,6 +126,98 @@ void skinny_128_384_init #endif } +/** + * \brief Performs an unrolled round for Skinny-128-384 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + } while (0) + +/** + * \brief Performs an unrolled round for Skinny-128-384 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK3[(1 - half) * 2]); \ + skinny128_LFSR3(TK3[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_384_encrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -125,7 +231,6 @@ void skinny_128_384_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -150,53 +255,19 @@ void skinny_128_384_encrypt TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_384_round(s0, s1, s2, s3, 0, 0); + skinny_128_384_round(s3, s0, s1, s2, 1, 1); + skinny_128_384_round(s2, s3, s0, s1, 0, 2); + skinny_128_384_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -207,6 +278,93 @@ void skinny_128_384_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Inverse permutation on TK1 for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + s1 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s2 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK3[(1 - half) * 2]); \ + skinny128_LFSR2(TK3[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_384_decrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -218,9 +376,8 @@ void skinny_128_384_decrypt uint32_t TK3[4]; uint8_t rc = 0x15; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -251,7 +408,7 @@ void skinny_128_384_decrypt skinny128_fast_forward_tk(TK2); skinny128_fast_forward_tk(TK3); for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2 and TK3. + /* Also fast-forward the LFSR's on every byte of TK2 and TK3 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -263,50 +420,20 @@ void skinny_128_384_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_inv_permute_tk(TK3); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); - skinny128_LFSR2(TK3[2]); - skinny128_LFSR2(TK3[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); + skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_384_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_384_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_384_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_384_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_384_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_384_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_384_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -316,6 +443,57 @@ void skinny_128_384_decrypt le_store_word32(output + 12, s3); } +/** + * \def skinny_128_384_round_tk2(s0, s1, s2, s3, half) + * \brief Performs an unrolled round for skinny_128_384_encrypt_tk2(). + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#if SKINNY_128_SMALL_SCHEDULE +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round_tk_full(s0, s1, s2, s3, half) +#else /* !SKINNY_128_SMALL_SCHEDULE */ +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2] ^ TK2[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1] ^ \ + TK2[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 and TK2 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) +#endif /* !SKINNY_128_SMALL_SCHEDULE */ + void skinny_128_384_encrypt_tk2 (skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input, const unsigned char *tk2) @@ -329,7 +507,6 @@ void skinny_128_384_encrypt_tk2 #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -354,53 +531,14 @@ void skinny_128_384_encrypt_tk2 TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0] ^ TK2[0]; - s1 ^= schedule[1] ^ TK1[1] ^ TK2[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK3); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); -#else - schedule += 2; + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk2(s0, s1, s2, s3, 0, 0); + skinny_128_384_round_tk2(s3, s0, s1, s2, 1, 1); + skinny_128_384_round_tk2(s2, s3, s0, s1, 0, 2); + skinny_128_384_round_tk2(s1, s2, s3, s0, 1, 3); +#if !SKINNY_128_SMALL_SCHEDULE + schedule += 8; #endif } @@ -419,7 +557,6 @@ void skinny_128_384_encrypt_tk_full uint32_t TK1[4]; uint32_t TK2[4]; uint32_t TK3[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -443,45 +580,12 @@ void skinny_128_384_encrypt_tk_full TK3[2] = le_load_word32(key + 40); TK3[3] = le_load_word32(key + 44); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ @@ -518,7 +622,7 @@ void skinny_128_256_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -527,16 +631,87 @@ void skinny_128_256_init schedule[0] = TK2[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ (rc >> 4); - /* Permute TK2 for the next round */ - skinny128_permute_tk(TK2); + /* Permute the bottom half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ (rc >> 4); - /* Apply the LFSR to TK2 */ + /* Permute the top half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); } #endif } +/** + * \brief Performs an unrolled round for Skinny-128-256 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled round for Skinny-128-256 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_256_encrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -549,7 +724,6 @@ void skinny_128_256_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -570,50 +744,19 @@ void skinny_128_256_encrypt TK2[3] = le_load_word32(ks->TK2 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_256_round(s0, s1, s2, s3, 0, 0); + skinny_128_256_round(s3, s0, s1, s2, 1, 1); + skinny_128_256_round(s2, s3, s0, s1, 0, 2); + skinny_128_256_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -624,6 +767,63 @@ void skinny_128_256_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_inv_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_256_decrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -634,9 +834,8 @@ void skinny_128_256_decrypt uint32_t TK2[4]; uint8_t rc = 0x09; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -658,7 +857,7 @@ void skinny_128_256_decrypt TK2[2] = le_load_word32(ks->TK2 + 8); TK2[3] = le_load_word32(ks->TK2 + 12); for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2. + /* Also fast-forward the LFSR's on every byte of TK2 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -666,47 +865,20 @@ void skinny_128_256_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); + skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_256_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_256_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_256_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_256_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_256_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_256_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_256_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -723,7 +895,6 @@ void skinny_128_256_encrypt_tk_full uint32_t s0, s1, s2, s3; uint32_t TK1[4]; uint32_t TK2[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -743,42 +914,12 @@ void skinny_128_256_encrypt_tk_full TK2[2] = le_load_word32(key + 24); TK2[3] = le_load_word32(key + 28); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ diff --git a/skinny/Implementations/crypto_aead/skinnyaeadtk396128v1/rhys/internal-skinnyutil.h b/skinny/Implementations/crypto_aead/skinnyaeadtk396128v1/rhys/internal-skinnyutil.h index 83136cb..8a5296d 100644 --- a/skinny/Implementations/crypto_aead/skinnyaeadtk396128v1/rhys/internal-skinnyutil.h +++ b/skinny/Implementations/crypto_aead/skinnyaeadtk396128v1/rhys/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: diff --git a/skinny/Implementations/crypto_aead/skinnyaeadtk39664v1/rhys/internal-skinny128.c b/skinny/Implementations/crypto_aead/skinnyaeadtk39664v1/rhys/internal-skinny128.c index 579ced1..d4adca0 100644 --- a/skinny/Implementations/crypto_aead/skinnyaeadtk39664v1/rhys/internal-skinny128.c +++ b/skinny/Implementations/crypto_aead/skinnyaeadtk39664v1/rhys/internal-skinny128.c @@ -90,7 +90,7 @@ void skinny_128_384_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -99,11 +99,25 @@ void skinny_128_384_init schedule[0] = TK2[0] ^ TK3[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ TK3[1] ^ (rc >> 4); - /* Permute TK2 and TK3 for the next round */ - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); + /* Permute the bottom half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_permute_tk_half(TK3[2], TK3[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + skinny128_LFSR3(TK3[2]); + skinny128_LFSR3(TK3[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ TK3[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ TK3[3] ^ (rc >> 4); - /* Apply the LFSR's to TK2 and TK3 */ + /* Permute the top half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); + skinny128_permute_tk_half(TK3[0], TK3[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR3(TK3[0]); @@ -112,6 +126,98 @@ void skinny_128_384_init #endif } +/** + * \brief Performs an unrolled round for Skinny-128-384 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + } while (0) + +/** + * \brief Performs an unrolled round for Skinny-128-384 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK3[(1 - half) * 2]); \ + skinny128_LFSR3(TK3[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_384_encrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -125,7 +231,6 @@ void skinny_128_384_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -150,53 +255,19 @@ void skinny_128_384_encrypt TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_384_round(s0, s1, s2, s3, 0, 0); + skinny_128_384_round(s3, s0, s1, s2, 1, 1); + skinny_128_384_round(s2, s3, s0, s1, 0, 2); + skinny_128_384_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -207,6 +278,93 @@ void skinny_128_384_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Inverse permutation on TK1 for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + s1 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s2 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK3[(1 - half) * 2]); \ + skinny128_LFSR2(TK3[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_384_decrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -218,9 +376,8 @@ void skinny_128_384_decrypt uint32_t TK3[4]; uint8_t rc = 0x15; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -251,7 +408,7 @@ void skinny_128_384_decrypt skinny128_fast_forward_tk(TK2); skinny128_fast_forward_tk(TK3); for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2 and TK3. + /* Also fast-forward the LFSR's on every byte of TK2 and TK3 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -263,50 +420,20 @@ void skinny_128_384_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_inv_permute_tk(TK3); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); - skinny128_LFSR2(TK3[2]); - skinny128_LFSR2(TK3[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); + skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_384_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_384_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_384_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_384_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_384_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_384_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_384_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -316,6 +443,57 @@ void skinny_128_384_decrypt le_store_word32(output + 12, s3); } +/** + * \def skinny_128_384_round_tk2(s0, s1, s2, s3, half) + * \brief Performs an unrolled round for skinny_128_384_encrypt_tk2(). + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#if SKINNY_128_SMALL_SCHEDULE +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round_tk_full(s0, s1, s2, s3, half) +#else /* !SKINNY_128_SMALL_SCHEDULE */ +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2] ^ TK2[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1] ^ \ + TK2[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 and TK2 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) +#endif /* !SKINNY_128_SMALL_SCHEDULE */ + void skinny_128_384_encrypt_tk2 (skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input, const unsigned char *tk2) @@ -329,7 +507,6 @@ void skinny_128_384_encrypt_tk2 #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -354,53 +531,14 @@ void skinny_128_384_encrypt_tk2 TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0] ^ TK2[0]; - s1 ^= schedule[1] ^ TK1[1] ^ TK2[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK3); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); -#else - schedule += 2; + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk2(s0, s1, s2, s3, 0, 0); + skinny_128_384_round_tk2(s3, s0, s1, s2, 1, 1); + skinny_128_384_round_tk2(s2, s3, s0, s1, 0, 2); + skinny_128_384_round_tk2(s1, s2, s3, s0, 1, 3); +#if !SKINNY_128_SMALL_SCHEDULE + schedule += 8; #endif } @@ -419,7 +557,6 @@ void skinny_128_384_encrypt_tk_full uint32_t TK1[4]; uint32_t TK2[4]; uint32_t TK3[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -443,45 +580,12 @@ void skinny_128_384_encrypt_tk_full TK3[2] = le_load_word32(key + 40); TK3[3] = le_load_word32(key + 44); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ @@ -518,7 +622,7 @@ void skinny_128_256_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -527,16 +631,87 @@ void skinny_128_256_init schedule[0] = TK2[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ (rc >> 4); - /* Permute TK2 for the next round */ - skinny128_permute_tk(TK2); + /* Permute the bottom half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ (rc >> 4); - /* Apply the LFSR to TK2 */ + /* Permute the top half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); } #endif } +/** + * \brief Performs an unrolled round for Skinny-128-256 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled round for Skinny-128-256 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_256_encrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -549,7 +724,6 @@ void skinny_128_256_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -570,50 +744,19 @@ void skinny_128_256_encrypt TK2[3] = le_load_word32(ks->TK2 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_256_round(s0, s1, s2, s3, 0, 0); + skinny_128_256_round(s3, s0, s1, s2, 1, 1); + skinny_128_256_round(s2, s3, s0, s1, 0, 2); + skinny_128_256_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -624,6 +767,63 @@ void skinny_128_256_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_inv_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_256_decrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -634,9 +834,8 @@ void skinny_128_256_decrypt uint32_t TK2[4]; uint8_t rc = 0x09; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -658,7 +857,7 @@ void skinny_128_256_decrypt TK2[2] = le_load_word32(ks->TK2 + 8); TK2[3] = le_load_word32(ks->TK2 + 12); for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2. + /* Also fast-forward the LFSR's on every byte of TK2 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -666,47 +865,20 @@ void skinny_128_256_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); + skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_256_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_256_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_256_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_256_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_256_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_256_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_256_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -723,7 +895,6 @@ void skinny_128_256_encrypt_tk_full uint32_t s0, s1, s2, s3; uint32_t TK1[4]; uint32_t TK2[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -743,42 +914,12 @@ void skinny_128_256_encrypt_tk_full TK2[2] = le_load_word32(key + 24); TK2[3] = le_load_word32(key + 28); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ diff --git a/skinny/Implementations/crypto_aead/skinnyaeadtk39664v1/rhys/internal-skinnyutil.h b/skinny/Implementations/crypto_aead/skinnyaeadtk39664v1/rhys/internal-skinnyutil.h index 83136cb..8a5296d 100644 --- a/skinny/Implementations/crypto_aead/skinnyaeadtk39664v1/rhys/internal-skinnyutil.h +++ b/skinny/Implementations/crypto_aead/skinnyaeadtk39664v1/rhys/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: diff --git a/skinny/Implementations/crypto_hash/skinnyhashtk2/rhys/internal-skinny128.c b/skinny/Implementations/crypto_hash/skinnyhashtk2/rhys/internal-skinny128.c index 579ced1..d4adca0 100644 --- a/skinny/Implementations/crypto_hash/skinnyhashtk2/rhys/internal-skinny128.c +++ b/skinny/Implementations/crypto_hash/skinnyhashtk2/rhys/internal-skinny128.c @@ -90,7 +90,7 @@ void skinny_128_384_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -99,11 +99,25 @@ void skinny_128_384_init schedule[0] = TK2[0] ^ TK3[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ TK3[1] ^ (rc >> 4); - /* Permute TK2 and TK3 for the next round */ - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); + /* Permute the bottom half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_permute_tk_half(TK3[2], TK3[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + skinny128_LFSR3(TK3[2]); + skinny128_LFSR3(TK3[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ TK3[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ TK3[3] ^ (rc >> 4); - /* Apply the LFSR's to TK2 and TK3 */ + /* Permute the top half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); + skinny128_permute_tk_half(TK3[0], TK3[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR3(TK3[0]); @@ -112,6 +126,98 @@ void skinny_128_384_init #endif } +/** + * \brief Performs an unrolled round for Skinny-128-384 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + } while (0) + +/** + * \brief Performs an unrolled round for Skinny-128-384 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK3[(1 - half) * 2]); \ + skinny128_LFSR3(TK3[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_384_encrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -125,7 +231,6 @@ void skinny_128_384_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -150,53 +255,19 @@ void skinny_128_384_encrypt TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_384_round(s0, s1, s2, s3, 0, 0); + skinny_128_384_round(s3, s0, s1, s2, 1, 1); + skinny_128_384_round(s2, s3, s0, s1, 0, 2); + skinny_128_384_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -207,6 +278,93 @@ void skinny_128_384_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Inverse permutation on TK1 for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + s1 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s2 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK3[(1 - half) * 2]); \ + skinny128_LFSR2(TK3[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_384_decrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -218,9 +376,8 @@ void skinny_128_384_decrypt uint32_t TK3[4]; uint8_t rc = 0x15; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -251,7 +408,7 @@ void skinny_128_384_decrypt skinny128_fast_forward_tk(TK2); skinny128_fast_forward_tk(TK3); for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2 and TK3. + /* Also fast-forward the LFSR's on every byte of TK2 and TK3 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -263,50 +420,20 @@ void skinny_128_384_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_inv_permute_tk(TK3); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); - skinny128_LFSR2(TK3[2]); - skinny128_LFSR2(TK3[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); + skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_384_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_384_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_384_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_384_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_384_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_384_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_384_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -316,6 +443,57 @@ void skinny_128_384_decrypt le_store_word32(output + 12, s3); } +/** + * \def skinny_128_384_round_tk2(s0, s1, s2, s3, half) + * \brief Performs an unrolled round for skinny_128_384_encrypt_tk2(). + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#if SKINNY_128_SMALL_SCHEDULE +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round_tk_full(s0, s1, s2, s3, half) +#else /* !SKINNY_128_SMALL_SCHEDULE */ +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2] ^ TK2[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1] ^ \ + TK2[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 and TK2 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) +#endif /* !SKINNY_128_SMALL_SCHEDULE */ + void skinny_128_384_encrypt_tk2 (skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input, const unsigned char *tk2) @@ -329,7 +507,6 @@ void skinny_128_384_encrypt_tk2 #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -354,53 +531,14 @@ void skinny_128_384_encrypt_tk2 TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0] ^ TK2[0]; - s1 ^= schedule[1] ^ TK1[1] ^ TK2[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK3); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); -#else - schedule += 2; + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk2(s0, s1, s2, s3, 0, 0); + skinny_128_384_round_tk2(s3, s0, s1, s2, 1, 1); + skinny_128_384_round_tk2(s2, s3, s0, s1, 0, 2); + skinny_128_384_round_tk2(s1, s2, s3, s0, 1, 3); +#if !SKINNY_128_SMALL_SCHEDULE + schedule += 8; #endif } @@ -419,7 +557,6 @@ void skinny_128_384_encrypt_tk_full uint32_t TK1[4]; uint32_t TK2[4]; uint32_t TK3[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -443,45 +580,12 @@ void skinny_128_384_encrypt_tk_full TK3[2] = le_load_word32(key + 40); TK3[3] = le_load_word32(key + 44); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ @@ -518,7 +622,7 @@ void skinny_128_256_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -527,16 +631,87 @@ void skinny_128_256_init schedule[0] = TK2[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ (rc >> 4); - /* Permute TK2 for the next round */ - skinny128_permute_tk(TK2); + /* Permute the bottom half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ (rc >> 4); - /* Apply the LFSR to TK2 */ + /* Permute the top half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); } #endif } +/** + * \brief Performs an unrolled round for Skinny-128-256 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled round for Skinny-128-256 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_256_encrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -549,7 +724,6 @@ void skinny_128_256_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -570,50 +744,19 @@ void skinny_128_256_encrypt TK2[3] = le_load_word32(ks->TK2 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_256_round(s0, s1, s2, s3, 0, 0); + skinny_128_256_round(s3, s0, s1, s2, 1, 1); + skinny_128_256_round(s2, s3, s0, s1, 0, 2); + skinny_128_256_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -624,6 +767,63 @@ void skinny_128_256_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_inv_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_256_decrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -634,9 +834,8 @@ void skinny_128_256_decrypt uint32_t TK2[4]; uint8_t rc = 0x09; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -658,7 +857,7 @@ void skinny_128_256_decrypt TK2[2] = le_load_word32(ks->TK2 + 8); TK2[3] = le_load_word32(ks->TK2 + 12); for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2. + /* Also fast-forward the LFSR's on every byte of TK2 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -666,47 +865,20 @@ void skinny_128_256_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); + skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_256_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_256_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_256_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_256_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_256_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_256_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_256_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -723,7 +895,6 @@ void skinny_128_256_encrypt_tk_full uint32_t s0, s1, s2, s3; uint32_t TK1[4]; uint32_t TK2[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -743,42 +914,12 @@ void skinny_128_256_encrypt_tk_full TK2[2] = le_load_word32(key + 24); TK2[3] = le_load_word32(key + 28); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ diff --git a/skinny/Implementations/crypto_hash/skinnyhashtk2/rhys/internal-skinnyutil.h b/skinny/Implementations/crypto_hash/skinnyhashtk2/rhys/internal-skinnyutil.h index 83136cb..8a5296d 100644 --- a/skinny/Implementations/crypto_hash/skinnyhashtk2/rhys/internal-skinnyutil.h +++ b/skinny/Implementations/crypto_hash/skinnyhashtk2/rhys/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: diff --git a/skinny/Implementations/crypto_hash/skinnyhashtk3/rhys/internal-skinny128.c b/skinny/Implementations/crypto_hash/skinnyhashtk3/rhys/internal-skinny128.c index 579ced1..d4adca0 100644 --- a/skinny/Implementations/crypto_hash/skinnyhashtk3/rhys/internal-skinny128.c +++ b/skinny/Implementations/crypto_hash/skinnyhashtk3/rhys/internal-skinny128.c @@ -90,7 +90,7 @@ void skinny_128_384_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -99,11 +99,25 @@ void skinny_128_384_init schedule[0] = TK2[0] ^ TK3[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ TK3[1] ^ (rc >> 4); - /* Permute TK2 and TK3 for the next round */ - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); + /* Permute the bottom half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_permute_tk_half(TK3[2], TK3[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + skinny128_LFSR3(TK3[2]); + skinny128_LFSR3(TK3[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ TK3[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ TK3[3] ^ (rc >> 4); - /* Apply the LFSR's to TK2 and TK3 */ + /* Permute the top half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); + skinny128_permute_tk_half(TK3[0], TK3[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR3(TK3[0]); @@ -112,6 +126,98 @@ void skinny_128_384_init #endif } +/** + * \brief Performs an unrolled round for Skinny-128-384 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + } while (0) + +/** + * \brief Performs an unrolled round for Skinny-128-384 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK3[(1 - half) * 2]); \ + skinny128_LFSR3(TK3[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_384_encrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -125,7 +231,6 @@ void skinny_128_384_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -150,53 +255,19 @@ void skinny_128_384_encrypt TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_384_round(s0, s1, s2, s3, 0, 0); + skinny_128_384_round(s3, s0, s1, s2, 1, 1); + skinny_128_384_round(s2, s3, s0, s1, 0, 2); + skinny_128_384_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -207,6 +278,93 @@ void skinny_128_384_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Inverse permutation on TK1 for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + s1 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s2 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK3[(1 - half) * 2]); \ + skinny128_LFSR2(TK3[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_384_decrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -218,9 +376,8 @@ void skinny_128_384_decrypt uint32_t TK3[4]; uint8_t rc = 0x15; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -251,7 +408,7 @@ void skinny_128_384_decrypt skinny128_fast_forward_tk(TK2); skinny128_fast_forward_tk(TK3); for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2 and TK3. + /* Also fast-forward the LFSR's on every byte of TK2 and TK3 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -263,50 +420,20 @@ void skinny_128_384_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_inv_permute_tk(TK3); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); - skinny128_LFSR2(TK3[2]); - skinny128_LFSR2(TK3[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); + skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_384_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_384_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_384_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_384_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_384_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_384_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_384_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -316,6 +443,57 @@ void skinny_128_384_decrypt le_store_word32(output + 12, s3); } +/** + * \def skinny_128_384_round_tk2(s0, s1, s2, s3, half) + * \brief Performs an unrolled round for skinny_128_384_encrypt_tk2(). + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#if SKINNY_128_SMALL_SCHEDULE +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round_tk_full(s0, s1, s2, s3, half) +#else /* !SKINNY_128_SMALL_SCHEDULE */ +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2] ^ TK2[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1] ^ \ + TK2[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 and TK2 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) +#endif /* !SKINNY_128_SMALL_SCHEDULE */ + void skinny_128_384_encrypt_tk2 (skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input, const unsigned char *tk2) @@ -329,7 +507,6 @@ void skinny_128_384_encrypt_tk2 #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -354,53 +531,14 @@ void skinny_128_384_encrypt_tk2 TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0] ^ TK2[0]; - s1 ^= schedule[1] ^ TK1[1] ^ TK2[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK3); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); -#else - schedule += 2; + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk2(s0, s1, s2, s3, 0, 0); + skinny_128_384_round_tk2(s3, s0, s1, s2, 1, 1); + skinny_128_384_round_tk2(s2, s3, s0, s1, 0, 2); + skinny_128_384_round_tk2(s1, s2, s3, s0, 1, 3); +#if !SKINNY_128_SMALL_SCHEDULE + schedule += 8; #endif } @@ -419,7 +557,6 @@ void skinny_128_384_encrypt_tk_full uint32_t TK1[4]; uint32_t TK2[4]; uint32_t TK3[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -443,45 +580,12 @@ void skinny_128_384_encrypt_tk_full TK3[2] = le_load_word32(key + 40); TK3[3] = le_load_word32(key + 44); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ @@ -518,7 +622,7 @@ void skinny_128_256_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -527,16 +631,87 @@ void skinny_128_256_init schedule[0] = TK2[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ (rc >> 4); - /* Permute TK2 for the next round */ - skinny128_permute_tk(TK2); + /* Permute the bottom half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ (rc >> 4); - /* Apply the LFSR to TK2 */ + /* Permute the top half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); } #endif } +/** + * \brief Performs an unrolled round for Skinny-128-256 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled round for Skinny-128-256 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_256_encrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -549,7 +724,6 @@ void skinny_128_256_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -570,50 +744,19 @@ void skinny_128_256_encrypt TK2[3] = le_load_word32(ks->TK2 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_256_round(s0, s1, s2, s3, 0, 0); + skinny_128_256_round(s3, s0, s1, s2, 1, 1); + skinny_128_256_round(s2, s3, s0, s1, 0, 2); + skinny_128_256_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -624,6 +767,63 @@ void skinny_128_256_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_inv_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_256_decrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -634,9 +834,8 @@ void skinny_128_256_decrypt uint32_t TK2[4]; uint8_t rc = 0x09; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -658,7 +857,7 @@ void skinny_128_256_decrypt TK2[2] = le_load_word32(ks->TK2 + 8); TK2[3] = le_load_word32(ks->TK2 + 12); for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2. + /* Also fast-forward the LFSR's on every byte of TK2 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -666,47 +865,20 @@ void skinny_128_256_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); + skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_256_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_256_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_256_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_256_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_256_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_256_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_256_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -723,7 +895,6 @@ void skinny_128_256_encrypt_tk_full uint32_t s0, s1, s2, s3; uint32_t TK1[4]; uint32_t TK2[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -743,42 +914,12 @@ void skinny_128_256_encrypt_tk_full TK2[2] = le_load_word32(key + 24); TK2[3] = le_load_word32(key + 28); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ diff --git a/skinny/Implementations/crypto_hash/skinnyhashtk3/rhys/internal-skinnyutil.h b/skinny/Implementations/crypto_hash/skinnyhashtk3/rhys/internal-skinnyutil.h index 83136cb..8a5296d 100644 --- a/skinny/Implementations/crypto_hash/skinnyhashtk3/rhys/internal-skinnyutil.h +++ b/skinny/Implementations/crypto_hash/skinnyhashtk3/rhys/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: diff --git a/spook/Implementations/crypto_aead/spook128mu384v1/rhys/internal-spook-avr.S b/spook/Implementations/crypto_aead/spook128mu384v1/rhys/internal-spook-avr.S new file mode 100644 index 0000000..05ca51e --- /dev/null +++ b/spook/Implementations/crypto_aead/spook128mu384v1/rhys/internal-spook-avr.S @@ -0,0 +1,4898 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .text +.global clyde128_encrypt + .type clyde128_encrypt, @function +clyde128_encrypt: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r23 + push r22 + movw r30,r24 + movw r26,r20 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 36 + ld r20,X+ + ld r21,X+ + ld r22,X+ + ld r23,X+ + ld r2,X+ + ld r3,X+ + ld r4,X+ + ld r5,X+ + ld r6,X+ + ld r7,X+ + ld r8,X+ + ld r9,X+ + ld r10,X+ + ld r11,X+ + ld r12,X+ + ld r13,X+ + ld r0,Z + eor r20,r0 + ldd r0,Z+1 + eor r21,r0 + ldd r0,Z+2 + eor r22,r0 + ldd r0,Z+3 + eor r23,r0 + ldd r0,Z+4 + eor r2,r0 + ldd r0,Z+5 + eor r3,r0 + ldd r0,Z+6 + eor r4,r0 + ldd r0,Z+7 + eor r5,r0 + ldd r0,Z+8 + eor r6,r0 + ldd r0,Z+9 + eor r7,r0 + ldd r0,Z+10 + eor r8,r0 + ldd r0,Z+11 + eor r9,r0 + ldd r0,Z+12 + eor r10,r0 + ldd r0,Z+13 + eor r11,r0 + ldd r0,Z+14 + eor r12,r0 + ldd r0,Z+15 + eor r13,r0 + movw r26,r18 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+1,r18 + std Y+2,r19 + std Y+3,r14 + std Y+4,r15 + eor r20,r18 + eor r21,r19 + eor r22,r14 + eor r23,r15 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+5,r18 + std Y+6,r19 + std Y+7,r14 + std Y+8,r15 + eor r2,r18 + eor r3,r19 + eor r4,r14 + eor r5,r15 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+9,r18 + std Y+10,r19 + std Y+11,r14 + std Y+12,r15 + eor r6,r18 + eor r7,r19 + eor r8,r14 + eor r9,r15 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+13,r18 + std Y+14,r19 + std Y+15,r14 + std Y+16,r15 + eor r10,r18 + eor r11,r19 + eor r12,r14 + eor r13,r15 + rcall 151f + ldi r27,1 + eor r20,r27 + rcall 151f + ldi r27,1 + eor r2,r27 + rcall 635f + rcall 151f + ldi r27,1 + eor r6,r27 + rcall 151f + ldi r27,1 + eor r10,r27 + rcall 635f + rcall 151f + ldi r27,1 + eor r20,r27 + eor r2,r27 + rcall 151f + ldi r27,1 + eor r2,r27 + eor r6,r27 + rcall 635f + rcall 151f + ldi r27,1 + eor r6,r27 + eor r10,r27 + rcall 151f + ldi r27,1 + eor r20,r27 + eor r2,r27 + eor r10,r27 + rcall 635f + rcall 151f + ldi r27,1 + eor r20,r27 + eor r6,r27 + rcall 151f + ldi r27,1 + eor r2,r27 + eor r10,r27 + rcall 635f + rcall 151f + ldi r27,1 + eor r20,r27 + eor r2,r27 + eor r6,r27 + rcall 151f + ldi r27,1 + eor r2,r27 + eor r6,r27 + eor r10,r27 + rcall 635f + rjmp 725f +151: + movw r18,r20 + movw r14,r22 + and r18,r2 + and r19,r3 + and r14,r4 + and r15,r5 + eor r18,r6 + eor r19,r7 + eor r14,r8 + eor r15,r9 + movw r24,r10 + movw r16,r12 + and r24,r20 + and r25,r21 + and r16,r22 + and r17,r23 + eor r24,r2 + eor r25,r3 + eor r16,r4 + eor r17,r5 + movw r6,r18 + movw r8,r14 + and r6,r24 + and r7,r25 + and r8,r16 + and r9,r17 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + and r10,r18 + and r11,r19 + and r12,r14 + and r13,r15 + eor r10,r20 + eor r11,r21 + eor r12,r22 + eor r13,r23 + movw r20,r24 + movw r22,r16 + movw r2,r18 + movw r4,r14 + mov r18,r21 + mov r19,r22 + mov r14,r23 + mov r15,r20 + eor r0,r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + or r15,r0 + eor r18,r20 + eor r19,r21 + eor r14,r22 + eor r15,r23 + mov r24,r3 + mov r25,r4 + mov r16,r5 + mov r17,r2 + eor r0,r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r24,r2 + eor r25,r3 + eor r16,r4 + eor r17,r5 + movw r26,r18 + mov r1,r14 + mov r0,r15 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r18,r26 + eor r19,r27 + eor r14,r1 + eor r15,r0 + movw r26,r24 + mov r1,r16 + mov r0,r17 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r24,r26 + eor r25,r27 + eor r16,r1 + eor r17,r0 + mov r0,r22 + mov r22,r20 + mov r20,r0 + mov r0,r23 + mov r23,r21 + mov r21,r0 + bst r20,0 + lsr r23 + ror r22 + ror r21 + ror r20 + bld r23,7 + eor r20,r18 + eor r21,r19 + eor r22,r14 + eor r23,r15 + mov r0,r4 + mov r4,r2 + mov r2,r0 + mov r0,r5 + mov r5,r3 + mov r3,r0 + bst r2,0 + lsr r5 + ror r4 + ror r3 + ror r2 + bld r5,7 + eor r2,r24 + eor r3,r25 + eor r4,r16 + eor r5,r17 + movw r18,r20 + movw r14,r22 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + eor r18,r20 + eor r19,r21 + eor r14,r22 + eor r15,r23 + movw r24,r2 + movw r16,r4 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + eor r24,r2 + eor r25,r3 + eor r16,r4 + eor r17,r5 + mov r26,r17 + mov r27,r24 + mov r1,r25 + mov r0,r16 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r20,r26 + eor r21,r27 + eor r22,r1 + eor r23,r0 + mov r26,r15 + mov r27,r18 + mov r1,r19 + mov r0,r14 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r2,r26 + eor r3,r27 + eor r4,r1 + eor r5,r0 + bst r19,7 + lsl r14 + rol r15 + rol r18 + rol r19 + bld r14,0 + eor r20,r14 + eor r21,r15 + eor r22,r18 + eor r23,r19 + bst r25,7 + lsl r16 + rol r17 + rol r24 + rol r25 + bld r16,0 + eor r2,r16 + eor r3,r17 + eor r4,r24 + eor r5,r25 + mov r18,r7 + mov r19,r8 + mov r14,r9 + mov r15,r6 + eor r0,r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + or r15,r0 + eor r18,r6 + eor r19,r7 + eor r14,r8 + eor r15,r9 + mov r24,r11 + mov r25,r12 + mov r16,r13 + mov r17,r10 + eor r0,r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r24,r10 + eor r25,r11 + eor r16,r12 + eor r17,r13 + movw r26,r18 + mov r1,r14 + mov r0,r15 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r18,r26 + eor r19,r27 + eor r14,r1 + eor r15,r0 + movw r26,r24 + mov r1,r16 + mov r0,r17 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r24,r26 + eor r25,r27 + eor r16,r1 + eor r17,r0 + mov r0,r8 + mov r8,r6 + mov r6,r0 + mov r0,r9 + mov r9,r7 + mov r7,r0 + bst r6,0 + lsr r9 + ror r8 + ror r7 + ror r6 + bld r9,7 + eor r6,r18 + eor r7,r19 + eor r8,r14 + eor r9,r15 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + bst r10,0 + lsr r13 + ror r12 + ror r11 + ror r10 + bld r13,7 + eor r10,r24 + eor r11,r25 + eor r12,r16 + eor r13,r17 + movw r18,r6 + movw r14,r8 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + eor r18,r6 + eor r19,r7 + eor r14,r8 + eor r15,r9 + movw r24,r10 + movw r16,r12 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + eor r24,r10 + eor r25,r11 + eor r16,r12 + eor r17,r13 + mov r26,r17 + mov r27,r24 + mov r1,r25 + mov r0,r16 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r6,r26 + eor r7,r27 + eor r8,r1 + eor r9,r0 + mov r26,r15 + mov r27,r18 + mov r1,r19 + mov r0,r14 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r10,r26 + eor r11,r27 + eor r12,r1 + eor r13,r0 + bst r19,7 + lsl r14 + rol r15 + rol r18 + rol r19 + bld r14,0 + eor r6,r14 + eor r7,r15 + eor r8,r18 + eor r9,r19 + bst r25,7 + lsl r16 + rol r17 + rol r24 + rol r25 + bld r16,0 + eor r10,r16 + eor r11,r17 + eor r12,r24 + eor r13,r25 + ret +635: + ldd r18,Y+9 + ldd r19,Y+10 + ldd r14,Y+11 + ldd r15,Y+12 + ldd r24,Y+1 + ldd r25,Y+2 + ldd r16,Y+3 + ldd r17,Y+4 + eor r18,r24 + eor r19,r25 + eor r14,r16 + eor r15,r17 + std Y+9,r24 + std Y+10,r25 + std Y+11,r16 + std Y+12,r17 + std Y+1,r18 + std Y+2,r19 + std Y+3,r14 + std Y+4,r15 + eor r20,r18 + eor r21,r19 + eor r22,r14 + eor r23,r15 + eor r6,r24 + eor r7,r25 + eor r8,r16 + eor r9,r17 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r14,Y+15 + ldd r15,Y+16 + ldd r24,Y+5 + ldd r25,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + eor r18,r24 + eor r19,r25 + eor r14,r16 + eor r15,r17 + std Y+13,r24 + std Y+14,r25 + std Y+15,r16 + std Y+16,r17 + std Y+5,r18 + std Y+6,r19 + std Y+7,r14 + std Y+8,r15 + eor r2,r18 + eor r3,r19 + eor r4,r14 + eor r5,r15 + eor r10,r24 + eor r11,r25 + eor r12,r16 + eor r13,r17 + ld r0,Z + eor r20,r0 + ldd r0,Z+1 + eor r21,r0 + ldd r0,Z+2 + eor r22,r0 + ldd r0,Z+3 + eor r23,r0 + ldd r0,Z+4 + eor r2,r0 + ldd r0,Z+5 + eor r3,r0 + ldd r0,Z+6 + eor r4,r0 + ldd r0,Z+7 + eor r5,r0 + ldd r0,Z+8 + eor r6,r0 + ldd r0,Z+9 + eor r7,r0 + ldd r0,Z+10 + eor r8,r0 + ldd r0,Z+11 + eor r9,r0 + ldd r0,Z+12 + eor r10,r0 + ldd r0,Z+13 + eor r11,r0 + ldd r0,Z+14 + eor r12,r0 + ldd r0,Z+15 + eor r13,r0 + ret +725: + ldd r26,Y+17 + ldd r27,Y+18 + st X+,r20 + st X+,r21 + st X+,r22 + st X+,r23 + st X+,r2 + st X+,r3 + st X+,r4 + st X+,r5 + st X+,r6 + st X+,r7 + st X+,r8 + st X+,r9 + st X+,r10 + st X+,r11 + st X+,r12 + st X+,r13 + adiw r28,18 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size clyde128_encrypt, .-clyde128_encrypt + + .text +.global clyde128_decrypt + .type clyde128_decrypt, @function +clyde128_decrypt: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r23 + push r22 + movw r30,r24 + movw r26,r20 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 36 + ld r20,X+ + ld r21,X+ + ld r22,X+ + ld r23,X+ + ld r2,X+ + ld r3,X+ + ld r4,X+ + ld r5,X+ + ld r6,X+ + ld r7,X+ + ld r8,X+ + ld r9,X+ + ld r10,X+ + ld r11,X+ + ld r12,X+ + ld r13,X+ + movw r26,r18 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+1,r18 + std Y+2,r19 + std Y+3,r14 + std Y+4,r15 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+5,r18 + std Y+6,r19 + std Y+7,r14 + std Y+8,r15 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+9,r18 + std Y+10,r19 + std Y+11,r14 + std Y+12,r15 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+13,r18 + std Y+14,r19 + std Y+15,r14 + std Y+16,r15 + rcall 533f + ldi r27,1 + eor r2,r27 + eor r6,r27 + eor r10,r27 + rcall 103f + ldi r27,1 + eor r20,r27 + eor r2,r27 + eor r6,r27 + rcall 103f + rcall 533f + ldi r27,1 + eor r2,r27 + eor r10,r27 + rcall 103f + ldi r27,1 + eor r20,r27 + eor r6,r27 + rcall 103f + rcall 533f + ldi r27,1 + eor r20,r27 + eor r2,r27 + eor r10,r27 + rcall 103f + ldi r27,1 + eor r6,r27 + eor r10,r27 + rcall 103f + rcall 533f + ldi r27,1 + eor r2,r27 + eor r6,r27 + rcall 103f + ldi r27,1 + eor r20,r27 + eor r2,r27 + rcall 103f + rcall 533f + ldi r27,1 + eor r10,r27 + rcall 103f + ldi r27,1 + eor r6,r27 + rcall 103f + rcall 533f + ldi r27,1 + eor r2,r27 + rcall 103f + ldi r27,1 + eor r20,r27 + rcall 103f + rjmp 623f +103: + mov r18,r23 + mov r19,r20 + mov r14,r21 + mov r15,r22 + bst r18,0 + lsr r15 + ror r14 + ror r19 + ror r18 + bld r15,7 + eor r18,r20 + eor r19,r21 + eor r14,r22 + eor r15,r23 + mov r24,r5 + mov r25,r2 + mov r16,r3 + mov r17,r4 + bst r24,0 + lsr r17 + ror r16 + ror r25 + ror r24 + bld r17,7 + eor r24,r2 + eor r25,r3 + eor r16,r4 + eor r17,r5 + movw r26,r18 + mov r1,r14 + mov r0,r15 + bst r0,7 + lsl r26 + rol r27 + rol r1 + rol r0 + bld r26,0 + eor r20,r26 + eor r21,r27 + eor r22,r1 + eor r23,r0 + movw r26,r24 + mov r1,r16 + mov r0,r17 + bst r0,7 + lsl r26 + rol r27 + rol r1 + rol r0 + bld r26,0 + eor r2,r26 + eor r3,r27 + eor r4,r1 + eor r5,r0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + eor r20,r15 + eor r21,r18 + eor r22,r19 + eor r23,r14 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + eor r2,r17 + eor r3,r24 + eor r4,r25 + eor r5,r16 + movw r18,r20 + movw r14,r22 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + eor r18,r20 + eor r19,r21 + eor r14,r22 + eor r15,r23 + movw r24,r2 + movw r16,r4 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + eor r24,r2 + eor r25,r3 + eor r16,r4 + eor r17,r5 + mov r26,r17 + mov r27,r24 + mov r1,r25 + mov r0,r16 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r20,r26 + eor r21,r27 + eor r22,r1 + eor r23,r0 + mov r26,r15 + mov r27,r18 + mov r1,r19 + mov r0,r14 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r2,r26 + eor r3,r27 + eor r4,r1 + eor r5,r0 + bst r20,0 + lsr r23 + ror r22 + ror r21 + ror r20 + bld r23,7 + eor r18,r22 + eor r19,r23 + eor r14,r20 + eor r15,r21 + bst r2,0 + lsr r5 + ror r4 + ror r3 + ror r2 + bld r5,7 + eor r24,r4 + eor r25,r5 + eor r16,r2 + eor r17,r3 + movw r20,r14 + movw r22,r18 + movw r2,r16 + movw r4,r24 + mov r18,r9 + mov r19,r6 + mov r14,r7 + mov r15,r8 + bst r18,0 + lsr r15 + ror r14 + ror r19 + ror r18 + bld r15,7 + eor r18,r6 + eor r19,r7 + eor r14,r8 + eor r15,r9 + mov r24,r13 + mov r25,r10 + mov r16,r11 + mov r17,r12 + bst r24,0 + lsr r17 + ror r16 + ror r25 + ror r24 + bld r17,7 + eor r24,r10 + eor r25,r11 + eor r16,r12 + eor r17,r13 + movw r26,r18 + mov r1,r14 + mov r0,r15 + bst r0,7 + lsl r26 + rol r27 + rol r1 + rol r0 + bld r26,0 + eor r6,r26 + eor r7,r27 + eor r8,r1 + eor r9,r0 + movw r26,r24 + mov r1,r16 + mov r0,r17 + bst r0,7 + lsl r26 + rol r27 + rol r1 + rol r0 + bld r26,0 + eor r10,r26 + eor r11,r27 + eor r12,r1 + eor r13,r0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + eor r6,r15 + eor r7,r18 + eor r8,r19 + eor r9,r14 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + eor r10,r17 + eor r11,r24 + eor r12,r25 + eor r13,r16 + movw r18,r6 + movw r14,r8 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + eor r18,r6 + eor r19,r7 + eor r14,r8 + eor r15,r9 + movw r24,r10 + movw r16,r12 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + eor r24,r10 + eor r25,r11 + eor r16,r12 + eor r17,r13 + mov r26,r17 + mov r27,r24 + mov r1,r25 + mov r0,r16 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r6,r26 + eor r7,r27 + eor r8,r1 + eor r9,r0 + mov r26,r15 + mov r27,r18 + mov r1,r19 + mov r0,r14 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r10,r26 + eor r11,r27 + eor r12,r1 + eor r13,r0 + bst r6,0 + lsr r9 + ror r8 + ror r7 + ror r6 + bld r9,7 + eor r18,r8 + eor r19,r9 + eor r14,r6 + eor r15,r7 + bst r10,0 + lsr r13 + ror r12 + ror r11 + ror r10 + bld r13,7 + eor r24,r12 + eor r25,r13 + eor r16,r10 + eor r17,r11 + movw r6,r14 + movw r8,r18 + movw r10,r16 + movw r12,r24 + movw r24,r20 + movw r16,r22 + and r24,r2 + and r25,r3 + and r16,r4 + and r17,r5 + eor r24,r6 + eor r25,r7 + eor r16,r8 + eor r17,r9 + movw r18,r2 + movw r14,r4 + and r18,r24 + and r19,r25 + and r14,r16 + and r15,r17 + eor r18,r10 + eor r19,r11 + eor r14,r12 + eor r15,r13 + movw r10,r24 + movw r12,r16 + and r24,r18 + and r25,r19 + and r16,r14 + and r17,r15 + eor r24,r20 + eor r25,r21 + eor r16,r22 + eor r17,r23 + movw r6,r18 + movw r8,r14 + and r6,r24 + and r7,r25 + and r8,r16 + and r9,r17 + eor r6,r2 + eor r7,r3 + eor r8,r4 + eor r9,r5 + movw r20,r18 + movw r22,r14 + movw r2,r24 + movw r4,r16 + ret +533: + ld r0,Z + eor r20,r0 + ldd r0,Z+1 + eor r21,r0 + ldd r0,Z+2 + eor r22,r0 + ldd r0,Z+3 + eor r23,r0 + ldd r0,Z+4 + eor r2,r0 + ldd r0,Z+5 + eor r3,r0 + ldd r0,Z+6 + eor r4,r0 + ldd r0,Z+7 + eor r5,r0 + ldd r0,Z+8 + eor r6,r0 + ldd r0,Z+9 + eor r7,r0 + ldd r0,Z+10 + eor r8,r0 + ldd r0,Z+11 + eor r9,r0 + ldd r0,Z+12 + eor r10,r0 + ldd r0,Z+13 + eor r11,r0 + ldd r0,Z+14 + eor r12,r0 + ldd r0,Z+15 + eor r13,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r14,Y+3 + ldd r15,Y+4 + ldd r24,Y+9 + ldd r25,Y+10 + ldd r16,Y+11 + ldd r17,Y+12 + eor r20,r18 + eor r21,r19 + eor r22,r14 + eor r23,r15 + eor r6,r24 + eor r7,r25 + eor r8,r16 + eor r9,r17 + eor r18,r24 + eor r19,r25 + eor r14,r16 + eor r15,r17 + std Y+1,r24 + std Y+2,r25 + std Y+3,r16 + std Y+4,r17 + std Y+9,r18 + std Y+10,r19 + std Y+11,r14 + std Y+12,r15 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r14,Y+7 + ldd r15,Y+8 + ldd r24,Y+13 + ldd r25,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + eor r2,r18 + eor r3,r19 + eor r4,r14 + eor r5,r15 + eor r10,r24 + eor r11,r25 + eor r12,r16 + eor r13,r17 + eor r18,r24 + eor r19,r25 + eor r14,r16 + eor r15,r17 + std Y+5,r24 + std Y+6,r25 + std Y+7,r16 + std Y+8,r17 + std Y+13,r18 + std Y+14,r19 + std Y+15,r14 + std Y+16,r15 + ret +623: + ld r0,Z + eor r20,r0 + ldd r0,Z+1 + eor r21,r0 + ldd r0,Z+2 + eor r22,r0 + ldd r0,Z+3 + eor r23,r0 + ldd r0,Z+4 + eor r2,r0 + ldd r0,Z+5 + eor r3,r0 + ldd r0,Z+6 + eor r4,r0 + ldd r0,Z+7 + eor r5,r0 + ldd r0,Z+8 + eor r6,r0 + ldd r0,Z+9 + eor r7,r0 + ldd r0,Z+10 + eor r8,r0 + ldd r0,Z+11 + eor r9,r0 + ldd r0,Z+12 + eor r10,r0 + ldd r0,Z+13 + eor r11,r0 + ldd r0,Z+14 + eor r12,r0 + ldd r0,Z+15 + eor r13,r0 + ldd r0,Y+1 + eor r20,r0 + ldd r0,Y+2 + eor r21,r0 + ldd r0,Y+3 + eor r22,r0 + ldd r0,Y+4 + eor r23,r0 + ldd r0,Y+5 + eor r2,r0 + ldd r0,Y+6 + eor r3,r0 + ldd r0,Y+7 + eor r4,r0 + ldd r0,Y+8 + eor r5,r0 + ldd r0,Y+9 + eor r6,r0 + ldd r0,Y+10 + eor r7,r0 + ldd r0,Y+11 + eor r8,r0 + ldd r0,Y+12 + eor r9,r0 + ldd r0,Y+13 + eor r10,r0 + ldd r0,Y+14 + eor r11,r0 + ldd r0,Y+15 + eor r12,r0 + ldd r0,Y+16 + eor r13,r0 + ldd r26,Y+17 + ldd r27,Y+18 + st X+,r20 + st X+,r21 + st X+,r22 + st X+,r23 + st X+,r2 + st X+,r3 + st X+,r4 + st X+,r5 + st X+,r6 + st X+,r7 + st X+,r8 + st X+,r9 + st X+,r10 + st X+,r11 + st X+,r12 + st X+,r13 + adiw r28,18 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size clyde128_decrypt, .-clyde128_decrypt + + .text +.global shadow512 + .type shadow512, @function +shadow512: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 1083f + rcall 1127f + ldi r25,1 + eor r18,r25 + rcall 1083f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 1083f + rcall 1127f + ldi r25,2 + eor r18,r25 + rcall 1083f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 1083f + rcall 1127f + ldi r25,4 + eor r18,r25 + rcall 1083f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + ldd r18,Z+48 + ldd r19,Z+49 + ldd r20,Z+50 + ldd r21,Z+51 + ldd r22,Z+52 + ldd r23,Z+53 + ldd r26,Z+54 + ldd r27,Z+55 + ldd r28,Z+56 + ldd r29,Z+57 + ldd r2,Z+58 + ldd r3,Z+59 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + rcall 1083f + rcall 1127f + ldi r25,8 + eor r18,r25 + rcall 1083f + std Z+48,r18 + std Z+49,r19 + std Z+50,r20 + std Z+51,r21 + std Z+52,r22 + std Z+53,r23 + std Z+54,r26 + std Z+55,r27 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + std Z+60,r4 + std Z+61,r5 + std Z+62,r6 + std Z+63,r7 + rcall 1553f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+52 + ldi r16,8 + eor r18,r16 + std Z+52,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 1083f + rcall 1127f + ldi r25,1 + eor r28,r25 + rcall 1083f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 1083f + rcall 1127f + ldi r25,2 + eor r28,r25 + rcall 1083f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 1083f + rcall 1127f + ldi r25,4 + eor r28,r25 + rcall 1083f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + ldd r18,Z+48 + ldd r19,Z+49 + ldd r20,Z+50 + ldd r21,Z+51 + ldd r22,Z+52 + ldd r23,Z+53 + ldd r26,Z+54 + ldd r27,Z+55 + ldd r28,Z+56 + ldd r29,Z+57 + ldd r2,Z+58 + ldd r3,Z+59 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + rcall 1083f + rcall 1127f + ldi r25,8 + eor r28,r25 + rcall 1083f + std Z+48,r18 + std Z+49,r19 + std Z+50,r20 + std Z+51,r21 + std Z+52,r22 + std Z+53,r23 + std Z+54,r26 + std Z+55,r27 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + std Z+60,r4 + std Z+61,r5 + std Z+62,r6 + std Z+63,r7 + rcall 1553f + ldd r18,Z+12 + ldi r25,1 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+28 + ldi r24,2 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+44 + ldi r17,4 + eor r18,r17 + std Z+44,r18 + ldd r18,Z+60 + ldi r16,8 + eor r18,r16 + std Z+60,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 1083f + rcall 1127f + ldi r25,1 + eor r18,r25 + eor r22,r25 + rcall 1083f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 1083f + rcall 1127f + ldi r25,2 + eor r18,r25 + eor r22,r25 + rcall 1083f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 1083f + rcall 1127f + ldi r25,4 + eor r18,r25 + eor r22,r25 + rcall 1083f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + ldd r18,Z+48 + ldd r19,Z+49 + ldd r20,Z+50 + ldd r21,Z+51 + ldd r22,Z+52 + ldd r23,Z+53 + ldd r26,Z+54 + ldd r27,Z+55 + ldd r28,Z+56 + ldd r29,Z+57 + ldd r2,Z+58 + ldd r3,Z+59 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + rcall 1083f + rcall 1127f + ldi r25,8 + eor r18,r25 + eor r22,r25 + rcall 1083f + std Z+48,r18 + std Z+49,r19 + std Z+50,r20 + std Z+51,r21 + std Z+52,r22 + std Z+53,r23 + std Z+54,r26 + std Z+55,r27 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + std Z+60,r4 + std Z+61,r5 + std Z+62,r6 + std Z+63,r7 + rcall 1553f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+8 + eor r18,r25 + std Z+8,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+24 + eor r18,r24 + std Z+24,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+40 + eor r18,r17 + std Z+40,r18 + ldd r18,Z+52 + ldi r16,8 + eor r18,r16 + std Z+52,r18 + ldd r18,Z+56 + eor r18,r16 + std Z+56,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 1083f + rcall 1127f + ldi r25,1 + eor r28,r25 + eor r4,r25 + rcall 1083f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 1083f + rcall 1127f + ldi r25,2 + eor r28,r25 + eor r4,r25 + rcall 1083f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 1083f + rcall 1127f + ldi r25,4 + eor r28,r25 + eor r4,r25 + rcall 1083f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + ldd r18,Z+48 + ldd r19,Z+49 + ldd r20,Z+50 + ldd r21,Z+51 + ldd r22,Z+52 + ldd r23,Z+53 + ldd r26,Z+54 + ldd r27,Z+55 + ldd r28,Z+56 + ldd r29,Z+57 + ldd r2,Z+58 + ldd r3,Z+59 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + rcall 1083f + rcall 1127f + ldi r25,8 + eor r28,r25 + eor r4,r25 + rcall 1083f + std Z+48,r18 + std Z+49,r19 + std Z+50,r20 + std Z+51,r21 + std Z+52,r22 + std Z+53,r23 + std Z+54,r26 + std Z+55,r27 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + std Z+60,r4 + std Z+61,r5 + std Z+62,r6 + std Z+63,r7 + rcall 1553f + ld r18,Z + ldi r25,1 + eor r18,r25 + st Z,r18 + ldd r18,Z+4 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+12 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+16 + ldi r24,2 + eor r18,r24 + std Z+16,r18 + ldd r18,Z+20 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+28 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+32 + ldi r17,4 + eor r18,r17 + std Z+32,r18 + ldd r18,Z+36 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+44 + eor r18,r17 + std Z+44,r18 + ldd r18,Z+48 + ldi r16,8 + eor r18,r16 + std Z+48,r18 + ldd r18,Z+52 + eor r18,r16 + std Z+52,r18 + ldd r18,Z+60 + eor r18,r16 + std Z+60,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 1083f + rcall 1127f + ldi r25,1 + eor r18,r25 + eor r28,r25 + rcall 1083f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 1083f + rcall 1127f + ldi r25,2 + eor r18,r25 + eor r28,r25 + rcall 1083f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 1083f + rcall 1127f + ldi r25,4 + eor r18,r25 + eor r28,r25 + rcall 1083f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + ldd r18,Z+48 + ldd r19,Z+49 + ldd r20,Z+50 + ldd r21,Z+51 + ldd r22,Z+52 + ldd r23,Z+53 + ldd r26,Z+54 + ldd r27,Z+55 + ldd r28,Z+56 + ldd r29,Z+57 + ldd r2,Z+58 + ldd r3,Z+59 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + rcall 1083f + rcall 1127f + ldi r25,8 + eor r18,r25 + eor r28,r25 + rcall 1083f + std Z+48,r18 + std Z+49,r19 + std Z+50,r20 + std Z+51,r21 + std Z+52,r22 + std Z+53,r23 + std Z+54,r26 + std Z+55,r27 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + std Z+60,r4 + std Z+61,r5 + std Z+62,r6 + std Z+63,r7 + rcall 1553f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+12 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+28 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+44 + eor r18,r17 + std Z+44,r18 + ldd r18,Z+52 + ldi r16,8 + eor r18,r16 + std Z+52,r18 + ldd r18,Z+60 + eor r18,r16 + std Z+60,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 1083f + rcall 1127f + ldi r25,1 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 1083f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 1083f + rcall 1127f + ldi r25,2 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 1083f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 1083f + rcall 1127f + ldi r25,4 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 1083f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + ldd r18,Z+48 + ldd r19,Z+49 + ldd r20,Z+50 + ldd r21,Z+51 + ldd r22,Z+52 + ldd r23,Z+53 + ldd r26,Z+54 + ldd r27,Z+55 + ldd r28,Z+56 + ldd r29,Z+57 + ldd r2,Z+58 + ldd r3,Z+59 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + rcall 1083f + rcall 1127f + ldi r25,8 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 1083f + std Z+48,r18 + std Z+49,r19 + std Z+50,r20 + std Z+51,r21 + std Z+52,r22 + std Z+53,r23 + std Z+54,r26 + std Z+55,r27 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + std Z+60,r4 + std Z+61,r5 + std Z+62,r6 + std Z+63,r7 + rcall 1553f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+8 + eor r18,r25 + std Z+8,r18 + ldd r18,Z+12 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+24 + eor r18,r24 + std Z+24,r18 + ldd r18,Z+28 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+40 + eor r18,r17 + std Z+40,r18 + ldd r18,Z+44 + eor r18,r17 + std Z+44,r18 + ldd r18,Z+52 + ldi r16,8 + eor r18,r16 + std Z+52,r18 + ldd r18,Z+56 + eor r18,r16 + std Z+56,r18 + ldd r18,Z+60 + eor r18,r16 + std Z+60,r18 + rjmp 1795f +1083: + movw r8,r18 + movw r10,r20 + and r8,r22 + and r9,r23 + and r10,r26 + and r11,r27 + eor r8,r28 + eor r9,r29 + eor r10,r2 + eor r11,r3 + movw r12,r4 + movw r14,r6 + and r12,r18 + and r13,r19 + and r14,r20 + and r15,r21 + eor r12,r22 + eor r13,r23 + eor r14,r26 + eor r15,r27 + movw r28,r8 + movw r2,r10 + and r28,r12 + and r29,r13 + and r2,r14 + and r3,r15 + eor r28,r4 + eor r29,r5 + eor r2,r6 + eor r3,r7 + and r4,r8 + and r5,r9 + and r6,r10 + and r7,r11 + eor r4,r18 + eor r5,r19 + eor r6,r20 + eor r7,r21 + movw r18,r12 + movw r20,r14 + movw r22,r8 + movw r26,r10 + ret +1127: + mov r8,r19 + mov r9,r20 + mov r10,r21 + mov r11,r18 + mov r0,r1 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + or r11,r0 + eor r8,r18 + eor r9,r19 + eor r10,r20 + eor r11,r21 + mov r12,r23 + mov r13,r26 + mov r14,r27 + mov r15,r22 + mov r0,r1 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + or r15,r0 + eor r12,r22 + eor r13,r23 + eor r14,r26 + eor r15,r27 + movw r24,r8 + movw r16,r10 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r8,r24 + eor r9,r25 + eor r10,r16 + eor r11,r17 + movw r24,r12 + movw r16,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r12,r24 + eor r13,r25 + eor r14,r16 + eor r15,r17 + mov r0,r20 + mov r20,r18 + mov r18,r0 + mov r0,r21 + mov r21,r19 + mov r19,r0 + bst r18,0 + lsr r21 + ror r20 + ror r19 + ror r18 + bld r21,7 + eor r18,r8 + eor r19,r9 + eor r20,r10 + eor r21,r11 + mov r0,r26 + mov r26,r22 + mov r22,r0 + mov r0,r27 + mov r27,r23 + mov r23,r0 + bst r22,0 + lsr r27 + ror r26 + ror r23 + ror r22 + bld r27,7 + eor r22,r12 + eor r23,r13 + eor r26,r14 + eor r27,r15 + movw r8,r18 + movw r10,r20 + lsl r8 + rol r9 + rol r10 + rol r11 + adc r8,r1 + eor r8,r18 + eor r9,r19 + eor r10,r20 + eor r11,r21 + movw r12,r22 + movw r14,r26 + lsl r12 + rol r13 + rol r14 + rol r15 + adc r12,r1 + eor r12,r22 + eor r13,r23 + eor r14,r26 + eor r15,r27 + mov r24,r15 + mov r25,r12 + mov r16,r13 + mov r17,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r18,r24 + eor r19,r25 + eor r20,r16 + eor r21,r17 + mov r24,r11 + mov r25,r8 + mov r16,r9 + mov r17,r10 + bst r24,0 + lsr r17 + ror r16 + ror r25 + ror r24 + bld r17,7 + eor r22,r24 + eor r23,r25 + eor r26,r16 + eor r27,r17 + lsl r10 + rol r11 + rol r8 + rol r9 + adc r10,r1 + eor r18,r10 + eor r19,r11 + eor r20,r8 + eor r21,r9 + lsl r14 + rol r15 + rol r12 + rol r13 + adc r14,r1 + eor r22,r14 + eor r23,r15 + eor r26,r12 + eor r27,r13 + mov r8,r29 + mov r9,r2 + mov r10,r3 + mov r11,r28 + mov r0,r1 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + or r11,r0 + eor r8,r28 + eor r9,r29 + eor r10,r2 + eor r11,r3 + mov r12,r5 + mov r13,r6 + mov r14,r7 + mov r15,r4 + mov r0,r1 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + or r15,r0 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + movw r24,r8 + movw r16,r10 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r8,r24 + eor r9,r25 + eor r10,r16 + eor r11,r17 + movw r24,r12 + movw r16,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r12,r24 + eor r13,r25 + eor r14,r16 + eor r15,r17 + mov r0,r2 + mov r2,r28 + mov r28,r0 + mov r0,r3 + mov r3,r29 + mov r29,r0 + bst r28,0 + lsr r3 + ror r2 + ror r29 + ror r28 + bld r3,7 + eor r28,r8 + eor r29,r9 + eor r2,r10 + eor r3,r11 + mov r0,r6 + mov r6,r4 + mov r4,r0 + mov r0,r7 + mov r7,r5 + mov r5,r0 + bst r4,0 + lsr r7 + ror r6 + ror r5 + ror r4 + bld r7,7 + eor r4,r12 + eor r5,r13 + eor r6,r14 + eor r7,r15 + movw r8,r28 + movw r10,r2 + lsl r8 + rol r9 + rol r10 + rol r11 + adc r8,r1 + eor r8,r28 + eor r9,r29 + eor r10,r2 + eor r11,r3 + movw r12,r4 + movw r14,r6 + lsl r12 + rol r13 + rol r14 + rol r15 + adc r12,r1 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + mov r24,r15 + mov r25,r12 + mov r16,r13 + mov r17,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r28,r24 + eor r29,r25 + eor r2,r16 + eor r3,r17 + mov r24,r11 + mov r25,r8 + mov r16,r9 + mov r17,r10 + bst r24,0 + lsr r17 + ror r16 + ror r25 + ror r24 + bld r17,7 + eor r4,r24 + eor r5,r25 + eor r6,r16 + eor r7,r17 + lsl r10 + rol r11 + rol r8 + rol r9 + adc r10,r1 + eor r28,r10 + eor r29,r11 + eor r2,r8 + eor r3,r9 + lsl r14 + rol r15 + rol r12 + rol r13 + adc r14,r1 + eor r4,r14 + eor r5,r15 + eor r6,r12 + eor r7,r13 + ret +1553: + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+16 + ldd r23,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + ldd r28,Z+32 + ldd r29,Z+33 + ldd r2,Z+34 + ldd r3,Z+35 + ldd r4,Z+48 + ldd r5,Z+49 + ldd r6,Z+50 + ldd r7,Z+51 + movw r8,r18 + movw r10,r20 + eor r8,r22 + eor r9,r23 + eor r10,r26 + eor r11,r27 + movw r12,r28 + movw r14,r2 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + eor r18,r12 + eor r19,r13 + eor r20,r14 + eor r21,r15 + eor r22,r12 + eor r23,r13 + eor r26,r14 + eor r27,r15 + eor r28,r8 + eor r29,r9 + eor r2,r10 + eor r3,r11 + eor r4,r8 + eor r5,r9 + eor r6,r10 + eor r7,r11 + st Z,r22 + std Z+1,r23 + std Z+2,r26 + std Z+3,r27 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+32,r4 + std Z+33,r5 + std Z+34,r6 + std Z+35,r7 + std Z+48,r28 + std Z+49,r29 + std Z+50,r2 + std Z+51,r3 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r20,Z+6 + ldd r21,Z+7 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+36 + ldd r29,Z+37 + ldd r2,Z+38 + ldd r3,Z+39 + ldd r4,Z+52 + ldd r5,Z+53 + ldd r6,Z+54 + ldd r7,Z+55 + movw r8,r18 + movw r10,r20 + eor r8,r22 + eor r9,r23 + eor r10,r26 + eor r11,r27 + movw r12,r28 + movw r14,r2 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + eor r18,r12 + eor r19,r13 + eor r20,r14 + eor r21,r15 + eor r22,r12 + eor r23,r13 + eor r26,r14 + eor r27,r15 + eor r28,r8 + eor r29,r9 + eor r2,r10 + eor r3,r11 + eor r4,r8 + eor r5,r9 + eor r6,r10 + eor r7,r11 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + std Z+36,r4 + std Z+37,r5 + std Z+38,r6 + std Z+39,r7 + std Z+52,r28 + std Z+53,r29 + std Z+54,r2 + std Z+55,r3 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r20,Z+10 + ldd r21,Z+11 + ldd r22,Z+24 + ldd r23,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+56 + ldd r5,Z+57 + ldd r6,Z+58 + ldd r7,Z+59 + movw r8,r18 + movw r10,r20 + eor r8,r22 + eor r9,r23 + eor r10,r26 + eor r11,r27 + movw r12,r28 + movw r14,r2 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + eor r18,r12 + eor r19,r13 + eor r20,r14 + eor r21,r15 + eor r22,r12 + eor r23,r13 + eor r26,r14 + eor r27,r15 + eor r28,r8 + eor r29,r9 + eor r2,r10 + eor r3,r11 + eor r4,r8 + eor r5,r9 + eor r6,r10 + eor r7,r11 + std Z+8,r22 + std Z+9,r23 + std Z+10,r26 + std Z+11,r27 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + std Z+40,r4 + std Z+41,r5 + std Z+42,r6 + std Z+43,r7 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r20,Z+14 + ldd r21,Z+15 + ldd r22,Z+28 + ldd r23,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + ldd r28,Z+44 + ldd r29,Z+45 + ldd r2,Z+46 + ldd r3,Z+47 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + movw r8,r18 + movw r10,r20 + eor r8,r22 + eor r9,r23 + eor r10,r26 + eor r11,r27 + movw r12,r28 + movw r14,r2 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + eor r18,r12 + eor r19,r13 + eor r20,r14 + eor r21,r15 + eor r22,r12 + eor r23,r13 + eor r26,r14 + eor r27,r15 + eor r28,r8 + eor r29,r9 + eor r2,r10 + eor r3,r11 + eor r4,r8 + eor r5,r9 + eor r6,r10 + eor r7,r11 + std Z+12,r22 + std Z+13,r23 + std Z+14,r26 + std Z+15,r27 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + std Z+60,r28 + std Z+61,r29 + std Z+62,r2 + std Z+63,r3 + ret +1795: + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size shadow512, .-shadow512 + + .text +.global shadow384 + .type shadow384, @function +shadow384: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 814f + rcall 858f + ldi r25,1 + eor r18,r25 + rcall 814f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 814f + rcall 858f + ldi r25,2 + eor r18,r25 + rcall 814f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 814f + rcall 858f + ldi r25,4 + eor r18,r25 + rcall 814f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + rcall 1284f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 814f + rcall 858f + ldi r25,1 + eor r28,r25 + rcall 814f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 814f + rcall 858f + ldi r25,2 + eor r28,r25 + rcall 814f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 814f + rcall 858f + ldi r25,4 + eor r28,r25 + rcall 814f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + rcall 1284f + ldd r18,Z+12 + ldi r25,1 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+28 + ldi r24,2 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+44 + ldi r17,4 + eor r18,r17 + std Z+44,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 814f + rcall 858f + ldi r25,1 + eor r18,r25 + eor r22,r25 + rcall 814f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 814f + rcall 858f + ldi r25,2 + eor r18,r25 + eor r22,r25 + rcall 814f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 814f + rcall 858f + ldi r25,4 + eor r18,r25 + eor r22,r25 + rcall 814f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + rcall 1284f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+8 + eor r18,r25 + std Z+8,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+24 + eor r18,r24 + std Z+24,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+40 + eor r18,r17 + std Z+40,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 814f + rcall 858f + ldi r25,1 + eor r28,r25 + eor r4,r25 + rcall 814f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 814f + rcall 858f + ldi r25,2 + eor r28,r25 + eor r4,r25 + rcall 814f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 814f + rcall 858f + ldi r25,4 + eor r28,r25 + eor r4,r25 + rcall 814f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + rcall 1284f + ld r18,Z + ldi r25,1 + eor r18,r25 + st Z,r18 + ldd r18,Z+4 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+12 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+16 + ldi r24,2 + eor r18,r24 + std Z+16,r18 + ldd r18,Z+20 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+28 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+32 + ldi r17,4 + eor r18,r17 + std Z+32,r18 + ldd r18,Z+36 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+44 + eor r18,r17 + std Z+44,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 814f + rcall 858f + ldi r25,1 + eor r18,r25 + eor r28,r25 + rcall 814f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 814f + rcall 858f + ldi r25,2 + eor r18,r25 + eor r28,r25 + rcall 814f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 814f + rcall 858f + ldi r25,4 + eor r18,r25 + eor r28,r25 + rcall 814f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + rcall 1284f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+12 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+28 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+44 + eor r18,r17 + std Z+44,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 814f + rcall 858f + ldi r25,1 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 814f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 814f + rcall 858f + ldi r25,2 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 814f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 814f + rcall 858f + ldi r25,4 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 814f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + rcall 1284f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+8 + eor r18,r25 + std Z+8,r18 + ldd r18,Z+12 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+24 + eor r18,r24 + std Z+24,r18 + ldd r18,Z+28 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+40 + eor r18,r17 + std Z+40,r18 + ldd r18,Z+44 + eor r18,r17 + std Z+44,r18 + rjmp 1430f +814: + movw r8,r18 + movw r10,r20 + and r8,r22 + and r9,r23 + and r10,r26 + and r11,r27 + eor r8,r28 + eor r9,r29 + eor r10,r2 + eor r11,r3 + movw r12,r4 + movw r14,r6 + and r12,r18 + and r13,r19 + and r14,r20 + and r15,r21 + eor r12,r22 + eor r13,r23 + eor r14,r26 + eor r15,r27 + movw r28,r8 + movw r2,r10 + and r28,r12 + and r29,r13 + and r2,r14 + and r3,r15 + eor r28,r4 + eor r29,r5 + eor r2,r6 + eor r3,r7 + and r4,r8 + and r5,r9 + and r6,r10 + and r7,r11 + eor r4,r18 + eor r5,r19 + eor r6,r20 + eor r7,r21 + movw r18,r12 + movw r20,r14 + movw r22,r8 + movw r26,r10 + ret +858: + mov r8,r19 + mov r9,r20 + mov r10,r21 + mov r11,r18 + mov r0,r1 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + or r11,r0 + eor r8,r18 + eor r9,r19 + eor r10,r20 + eor r11,r21 + mov r12,r23 + mov r13,r26 + mov r14,r27 + mov r15,r22 + mov r0,r1 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + or r15,r0 + eor r12,r22 + eor r13,r23 + eor r14,r26 + eor r15,r27 + movw r24,r8 + movw r16,r10 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r8,r24 + eor r9,r25 + eor r10,r16 + eor r11,r17 + movw r24,r12 + movw r16,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r12,r24 + eor r13,r25 + eor r14,r16 + eor r15,r17 + mov r0,r20 + mov r20,r18 + mov r18,r0 + mov r0,r21 + mov r21,r19 + mov r19,r0 + bst r18,0 + lsr r21 + ror r20 + ror r19 + ror r18 + bld r21,7 + eor r18,r8 + eor r19,r9 + eor r20,r10 + eor r21,r11 + mov r0,r26 + mov r26,r22 + mov r22,r0 + mov r0,r27 + mov r27,r23 + mov r23,r0 + bst r22,0 + lsr r27 + ror r26 + ror r23 + ror r22 + bld r27,7 + eor r22,r12 + eor r23,r13 + eor r26,r14 + eor r27,r15 + movw r8,r18 + movw r10,r20 + lsl r8 + rol r9 + rol r10 + rol r11 + adc r8,r1 + eor r8,r18 + eor r9,r19 + eor r10,r20 + eor r11,r21 + movw r12,r22 + movw r14,r26 + lsl r12 + rol r13 + rol r14 + rol r15 + adc r12,r1 + eor r12,r22 + eor r13,r23 + eor r14,r26 + eor r15,r27 + mov r24,r15 + mov r25,r12 + mov r16,r13 + mov r17,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r18,r24 + eor r19,r25 + eor r20,r16 + eor r21,r17 + mov r24,r11 + mov r25,r8 + mov r16,r9 + mov r17,r10 + bst r24,0 + lsr r17 + ror r16 + ror r25 + ror r24 + bld r17,7 + eor r22,r24 + eor r23,r25 + eor r26,r16 + eor r27,r17 + lsl r10 + rol r11 + rol r8 + rol r9 + adc r10,r1 + eor r18,r10 + eor r19,r11 + eor r20,r8 + eor r21,r9 + lsl r14 + rol r15 + rol r12 + rol r13 + adc r14,r1 + eor r22,r14 + eor r23,r15 + eor r26,r12 + eor r27,r13 + mov r8,r29 + mov r9,r2 + mov r10,r3 + mov r11,r28 + mov r0,r1 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + or r11,r0 + eor r8,r28 + eor r9,r29 + eor r10,r2 + eor r11,r3 + mov r12,r5 + mov r13,r6 + mov r14,r7 + mov r15,r4 + mov r0,r1 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + or r15,r0 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + movw r24,r8 + movw r16,r10 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r8,r24 + eor r9,r25 + eor r10,r16 + eor r11,r17 + movw r24,r12 + movw r16,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r12,r24 + eor r13,r25 + eor r14,r16 + eor r15,r17 + mov r0,r2 + mov r2,r28 + mov r28,r0 + mov r0,r3 + mov r3,r29 + mov r29,r0 + bst r28,0 + lsr r3 + ror r2 + ror r29 + ror r28 + bld r3,7 + eor r28,r8 + eor r29,r9 + eor r2,r10 + eor r3,r11 + mov r0,r6 + mov r6,r4 + mov r4,r0 + mov r0,r7 + mov r7,r5 + mov r5,r0 + bst r4,0 + lsr r7 + ror r6 + ror r5 + ror r4 + bld r7,7 + eor r4,r12 + eor r5,r13 + eor r6,r14 + eor r7,r15 + movw r8,r28 + movw r10,r2 + lsl r8 + rol r9 + rol r10 + rol r11 + adc r8,r1 + eor r8,r28 + eor r9,r29 + eor r10,r2 + eor r11,r3 + movw r12,r4 + movw r14,r6 + lsl r12 + rol r13 + rol r14 + rol r15 + adc r12,r1 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + mov r24,r15 + mov r25,r12 + mov r16,r13 + mov r17,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r28,r24 + eor r29,r25 + eor r2,r16 + eor r3,r17 + mov r24,r11 + mov r25,r8 + mov r16,r9 + mov r17,r10 + bst r24,0 + lsr r17 + ror r16 + ror r25 + ror r24 + bld r17,7 + eor r4,r24 + eor r5,r25 + eor r6,r16 + eor r7,r17 + lsl r10 + rol r11 + rol r8 + rol r9 + adc r10,r1 + eor r28,r10 + eor r29,r11 + eor r2,r8 + eor r3,r9 + lsl r14 + rol r15 + rol r12 + rol r13 + adc r14,r1 + eor r4,r14 + eor r5,r15 + eor r6,r12 + eor r7,r13 + ret +1284: + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+16 + ldd r23,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + ldd r28,Z+32 + ldd r29,Z+33 + ldd r2,Z+34 + ldd r3,Z+35 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + std Z+32,r22 + std Z+33,r23 + std Z+34,r26 + std Z+35,r27 + eor r22,r28 + eor r23,r29 + eor r26,r2 + eor r27,r3 + st Z,r22 + std Z+1,r23 + std Z+2,r26 + std Z+3,r27 + eor r18,r28 + eor r19,r29 + eor r20,r2 + eor r21,r3 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r20,Z+6 + ldd r21,Z+7 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+36 + ldd r29,Z+37 + ldd r2,Z+38 + ldd r3,Z+39 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + eor r22,r28 + eor r23,r29 + eor r26,r2 + eor r27,r3 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + eor r18,r28 + eor r19,r29 + eor r20,r2 + eor r21,r3 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r20,Z+10 + ldd r21,Z+11 + ldd r22,Z+24 + ldd r23,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + std Z+40,r22 + std Z+41,r23 + std Z+42,r26 + std Z+43,r27 + eor r22,r28 + eor r23,r29 + eor r26,r2 + eor r27,r3 + std Z+8,r22 + std Z+9,r23 + std Z+10,r26 + std Z+11,r27 + eor r18,r28 + eor r19,r29 + eor r20,r2 + eor r21,r3 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r20,Z+14 + ldd r21,Z+15 + ldd r22,Z+28 + ldd r23,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + ldd r28,Z+44 + ldd r29,Z+45 + ldd r2,Z+46 + ldd r3,Z+47 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + std Z+44,r22 + std Z+45,r23 + std Z+46,r26 + std Z+47,r27 + eor r22,r28 + eor r23,r29 + eor r26,r2 + eor r27,r3 + std Z+12,r22 + std Z+13,r23 + std Z+14,r26 + std Z+15,r27 + eor r18,r28 + eor r19,r29 + eor r20,r2 + eor r21,r3 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ret +1430: + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size shadow384, .-shadow384 + +#endif diff --git a/spook/Implementations/crypto_aead/spook128mu384v1/rhys/internal-spook.c b/spook/Implementations/crypto_aead/spook128mu384v1/rhys/internal-spook.c index 0e19216..068938b 100644 --- a/spook/Implementations/crypto_aead/spook128mu384v1/rhys/internal-spook.c +++ b/spook/Implementations/crypto_aead/spook128mu384v1/rhys/internal-spook.c @@ -22,6 +22,8 @@ #include "internal-spook.h" +#if !defined(__AVR__) + /** * \brief Number of steps in the Clyde-128 block cipher. * @@ -43,9 +45,9 @@ static uint8_t const rc[CLYDE128_STEPS][8] = { }; void clyde128_encrypt(const unsigned char key[CLYDE128_KEY_SIZE], - const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4], uint32_t output[CLYDE128_BLOCK_SIZE / 4], - const uint32_t input[CLYDE128_BLOCK_SIZE / 4]) + const uint32_t input[CLYDE128_BLOCK_SIZE / 4], + const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4]) { uint32_t k0, k1, k2, k3; uint32_t t0, t1, t2, t3; @@ -154,9 +156,9 @@ void clyde128_encrypt(const unsigned char key[CLYDE128_KEY_SIZE], } void clyde128_decrypt(const unsigned char key[CLYDE128_KEY_SIZE], - const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4], uint32_t output[CLYDE128_BLOCK_SIZE / 4], - const unsigned char input[CLYDE128_BLOCK_SIZE]) + const unsigned char input[CLYDE128_BLOCK_SIZE], + const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4]) { uint32_t k0, k1, k2, k3; uint32_t t0, t1, t2, t3; @@ -555,3 +557,5 @@ void shadow384(shadow384_state_t *state) le_store_word32(state->B + 44, s23); #endif } + +#endif /* !__AVR__ */ diff --git a/spook/Implementations/crypto_aead/spook128mu384v1/rhys/internal-spook.h b/spook/Implementations/crypto_aead/spook128mu384v1/rhys/internal-spook.h index b08ce80..77c8b86 100644 --- a/spook/Implementations/crypto_aead/spook128mu384v1/rhys/internal-spook.h +++ b/spook/Implementations/crypto_aead/spook128mu384v1/rhys/internal-spook.h @@ -93,31 +93,31 @@ typedef union * \brief Encrypts a block with the Clyde-128 block cipher. * * \param key Points to the key to encrypt with. - * \param tweak Points to the tweak to encrypt with. * \param output Output buffer for the ciphertext. * \param input Input buffer for the plaintext. + * \param tweak Points to the tweak to encrypt with. * * \sa clyde128_decrypt() */ void clyde128_encrypt(const unsigned char key[CLYDE128_KEY_SIZE], - const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4], uint32_t output[CLYDE128_BLOCK_SIZE / 4], - const uint32_t input[CLYDE128_BLOCK_SIZE / 4]); + const uint32_t input[CLYDE128_BLOCK_SIZE / 4], + const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4]); /** * \brief Decrypts a block with the Clyde-128 block cipher. * * \param key Points to the key to decrypt with. - * \param tweak Points to the tweak to decrypt with. * \param output Output buffer for the plaintext. * \param input Input buffer for the ciphertext. + * \param tweak Points to the tweak to decrypt with. * * \sa clyde128_encrypt() */ void clyde128_decrypt(const unsigned char key[CLYDE128_KEY_SIZE], - const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4], uint32_t output[CLYDE128_BLOCK_SIZE / 4], - const unsigned char input[CLYDE128_BLOCK_SIZE]); + const unsigned char input[CLYDE128_BLOCK_SIZE], + const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4]); /** * \brief Performs the Shadow-512 permutation on a state. diff --git a/spook/Implementations/crypto_aead/spook128mu384v1/rhys/spook.c b/spook/Implementations/crypto_aead/spook128mu384v1/rhys/spook.c index d075b33..2dbab94 100644 --- a/spook/Implementations/crypto_aead/spook128mu384v1/rhys/spook.c +++ b/spook/Implementations/crypto_aead/spook128mu384v1/rhys/spook.c @@ -86,7 +86,7 @@ static void spook_128_512_init state->B[CLYDE128_BLOCK_SIZE - 1] |= 0x40; } memcpy(state->B + CLYDE128_BLOCK_SIZE, npub, CLYDE128_BLOCK_SIZE); - clyde128_encrypt(k, state->W, state->W + 12, state->W + 4); + clyde128_encrypt(k, state->W + 12, state->W + 4, state->W); shadow512(state); } @@ -111,7 +111,7 @@ static void spook_128_384_init state->B[CLYDE128_BLOCK_SIZE - 1] |= 0x40; } memcpy(state->B + CLYDE128_BLOCK_SIZE, npub, CLYDE128_BLOCK_SIZE); - clyde128_encrypt(k, state->W, state->W + 8, state->W + 4); + clyde128_encrypt(k, state->W + 8, state->W + 4, state->W); shadow384(state); } @@ -310,7 +310,7 @@ int spook_128_512_su_aead_encrypt /* Compute the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_encrypt(k, state.W + 4, state.W, state.W); + clyde128_encrypt(k, state.W, state.W, state.W + 4); memcpy(c + mlen, state.B, SPOOK_TAG_SIZE); return 0; } @@ -345,7 +345,7 @@ int spook_128_512_su_aead_decrypt /* Check the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_decrypt(k, state.W + 4, state.W + 4, c + clen); + clyde128_decrypt(k, state.W + 4, c + clen, state.W + 4); return aead_check_tag (m, clen, state.B, state.B + CLYDE128_BLOCK_SIZE, SPOOK_TAG_SIZE); } @@ -377,7 +377,7 @@ int spook_128_384_su_aead_encrypt /* Compute the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_encrypt(k, state.W + 4, state.W, state.W); + clyde128_encrypt(k, state.W, state.W, state.W + 4); memcpy(c + mlen, state.B, SPOOK_TAG_SIZE); return 0; } @@ -412,7 +412,7 @@ int spook_128_384_su_aead_decrypt /* Check the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_decrypt(k, state.W + 4, state.W + 4, c + clen); + clyde128_decrypt(k, state.W + 4, c + clen, state.W + 4); return aead_check_tag (m, clen, state.B, state.B + CLYDE128_BLOCK_SIZE, SPOOK_TAG_SIZE); } @@ -444,7 +444,7 @@ int spook_128_512_mu_aead_encrypt /* Compute the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_encrypt(k, state.W + 4, state.W, state.W); + clyde128_encrypt(k, state.W, state.W, state.W + 4); memcpy(c + mlen, state.B, SPOOK_TAG_SIZE); return 0; } @@ -479,7 +479,7 @@ int spook_128_512_mu_aead_decrypt /* Check the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_decrypt(k, state.W + 4, state.W + 4, c + clen); + clyde128_decrypt(k, state.W + 4, c + clen, state.W + 4); return aead_check_tag (m, clen, state.B, state.B + CLYDE128_BLOCK_SIZE, SPOOK_TAG_SIZE); } @@ -511,7 +511,7 @@ int spook_128_384_mu_aead_encrypt /* Compute the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_encrypt(k, state.W + 4, state.W, state.W); + clyde128_encrypt(k, state.W, state.W, state.W + 4); memcpy(c + mlen, state.B, SPOOK_TAG_SIZE); return 0; } @@ -546,7 +546,7 @@ int spook_128_384_mu_aead_decrypt /* Check the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_decrypt(k, state.W + 4, state.W + 4, c + clen); + clyde128_decrypt(k, state.W + 4, c + clen, state.W + 4); return aead_check_tag (m, clen, state.B, state.B + CLYDE128_BLOCK_SIZE, SPOOK_TAG_SIZE); } diff --git a/spook/Implementations/crypto_aead/spook128mu512v1/rhys/internal-spook-avr.S b/spook/Implementations/crypto_aead/spook128mu512v1/rhys/internal-spook-avr.S new file mode 100644 index 0000000..05ca51e --- /dev/null +++ b/spook/Implementations/crypto_aead/spook128mu512v1/rhys/internal-spook-avr.S @@ -0,0 +1,4898 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .text +.global clyde128_encrypt + .type clyde128_encrypt, @function +clyde128_encrypt: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r23 + push r22 + movw r30,r24 + movw r26,r20 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 36 + ld r20,X+ + ld r21,X+ + ld r22,X+ + ld r23,X+ + ld r2,X+ + ld r3,X+ + ld r4,X+ + ld r5,X+ + ld r6,X+ + ld r7,X+ + ld r8,X+ + ld r9,X+ + ld r10,X+ + ld r11,X+ + ld r12,X+ + ld r13,X+ + ld r0,Z + eor r20,r0 + ldd r0,Z+1 + eor r21,r0 + ldd r0,Z+2 + eor r22,r0 + ldd r0,Z+3 + eor r23,r0 + ldd r0,Z+4 + eor r2,r0 + ldd r0,Z+5 + eor r3,r0 + ldd r0,Z+6 + eor r4,r0 + ldd r0,Z+7 + eor r5,r0 + ldd r0,Z+8 + eor r6,r0 + ldd r0,Z+9 + eor r7,r0 + ldd r0,Z+10 + eor r8,r0 + ldd r0,Z+11 + eor r9,r0 + ldd r0,Z+12 + eor r10,r0 + ldd r0,Z+13 + eor r11,r0 + ldd r0,Z+14 + eor r12,r0 + ldd r0,Z+15 + eor r13,r0 + movw r26,r18 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+1,r18 + std Y+2,r19 + std Y+3,r14 + std Y+4,r15 + eor r20,r18 + eor r21,r19 + eor r22,r14 + eor r23,r15 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+5,r18 + std Y+6,r19 + std Y+7,r14 + std Y+8,r15 + eor r2,r18 + eor r3,r19 + eor r4,r14 + eor r5,r15 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+9,r18 + std Y+10,r19 + std Y+11,r14 + std Y+12,r15 + eor r6,r18 + eor r7,r19 + eor r8,r14 + eor r9,r15 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+13,r18 + std Y+14,r19 + std Y+15,r14 + std Y+16,r15 + eor r10,r18 + eor r11,r19 + eor r12,r14 + eor r13,r15 + rcall 151f + ldi r27,1 + eor r20,r27 + rcall 151f + ldi r27,1 + eor r2,r27 + rcall 635f + rcall 151f + ldi r27,1 + eor r6,r27 + rcall 151f + ldi r27,1 + eor r10,r27 + rcall 635f + rcall 151f + ldi r27,1 + eor r20,r27 + eor r2,r27 + rcall 151f + ldi r27,1 + eor r2,r27 + eor r6,r27 + rcall 635f + rcall 151f + ldi r27,1 + eor r6,r27 + eor r10,r27 + rcall 151f + ldi r27,1 + eor r20,r27 + eor r2,r27 + eor r10,r27 + rcall 635f + rcall 151f + ldi r27,1 + eor r20,r27 + eor r6,r27 + rcall 151f + ldi r27,1 + eor r2,r27 + eor r10,r27 + rcall 635f + rcall 151f + ldi r27,1 + eor r20,r27 + eor r2,r27 + eor r6,r27 + rcall 151f + ldi r27,1 + eor r2,r27 + eor r6,r27 + eor r10,r27 + rcall 635f + rjmp 725f +151: + movw r18,r20 + movw r14,r22 + and r18,r2 + and r19,r3 + and r14,r4 + and r15,r5 + eor r18,r6 + eor r19,r7 + eor r14,r8 + eor r15,r9 + movw r24,r10 + movw r16,r12 + and r24,r20 + and r25,r21 + and r16,r22 + and r17,r23 + eor r24,r2 + eor r25,r3 + eor r16,r4 + eor r17,r5 + movw r6,r18 + movw r8,r14 + and r6,r24 + and r7,r25 + and r8,r16 + and r9,r17 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + and r10,r18 + and r11,r19 + and r12,r14 + and r13,r15 + eor r10,r20 + eor r11,r21 + eor r12,r22 + eor r13,r23 + movw r20,r24 + movw r22,r16 + movw r2,r18 + movw r4,r14 + mov r18,r21 + mov r19,r22 + mov r14,r23 + mov r15,r20 + eor r0,r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + or r15,r0 + eor r18,r20 + eor r19,r21 + eor r14,r22 + eor r15,r23 + mov r24,r3 + mov r25,r4 + mov r16,r5 + mov r17,r2 + eor r0,r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r24,r2 + eor r25,r3 + eor r16,r4 + eor r17,r5 + movw r26,r18 + mov r1,r14 + mov r0,r15 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r18,r26 + eor r19,r27 + eor r14,r1 + eor r15,r0 + movw r26,r24 + mov r1,r16 + mov r0,r17 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r24,r26 + eor r25,r27 + eor r16,r1 + eor r17,r0 + mov r0,r22 + mov r22,r20 + mov r20,r0 + mov r0,r23 + mov r23,r21 + mov r21,r0 + bst r20,0 + lsr r23 + ror r22 + ror r21 + ror r20 + bld r23,7 + eor r20,r18 + eor r21,r19 + eor r22,r14 + eor r23,r15 + mov r0,r4 + mov r4,r2 + mov r2,r0 + mov r0,r5 + mov r5,r3 + mov r3,r0 + bst r2,0 + lsr r5 + ror r4 + ror r3 + ror r2 + bld r5,7 + eor r2,r24 + eor r3,r25 + eor r4,r16 + eor r5,r17 + movw r18,r20 + movw r14,r22 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + eor r18,r20 + eor r19,r21 + eor r14,r22 + eor r15,r23 + movw r24,r2 + movw r16,r4 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + eor r24,r2 + eor r25,r3 + eor r16,r4 + eor r17,r5 + mov r26,r17 + mov r27,r24 + mov r1,r25 + mov r0,r16 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r20,r26 + eor r21,r27 + eor r22,r1 + eor r23,r0 + mov r26,r15 + mov r27,r18 + mov r1,r19 + mov r0,r14 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r2,r26 + eor r3,r27 + eor r4,r1 + eor r5,r0 + bst r19,7 + lsl r14 + rol r15 + rol r18 + rol r19 + bld r14,0 + eor r20,r14 + eor r21,r15 + eor r22,r18 + eor r23,r19 + bst r25,7 + lsl r16 + rol r17 + rol r24 + rol r25 + bld r16,0 + eor r2,r16 + eor r3,r17 + eor r4,r24 + eor r5,r25 + mov r18,r7 + mov r19,r8 + mov r14,r9 + mov r15,r6 + eor r0,r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + or r15,r0 + eor r18,r6 + eor r19,r7 + eor r14,r8 + eor r15,r9 + mov r24,r11 + mov r25,r12 + mov r16,r13 + mov r17,r10 + eor r0,r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r24,r10 + eor r25,r11 + eor r16,r12 + eor r17,r13 + movw r26,r18 + mov r1,r14 + mov r0,r15 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r18,r26 + eor r19,r27 + eor r14,r1 + eor r15,r0 + movw r26,r24 + mov r1,r16 + mov r0,r17 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r24,r26 + eor r25,r27 + eor r16,r1 + eor r17,r0 + mov r0,r8 + mov r8,r6 + mov r6,r0 + mov r0,r9 + mov r9,r7 + mov r7,r0 + bst r6,0 + lsr r9 + ror r8 + ror r7 + ror r6 + bld r9,7 + eor r6,r18 + eor r7,r19 + eor r8,r14 + eor r9,r15 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + bst r10,0 + lsr r13 + ror r12 + ror r11 + ror r10 + bld r13,7 + eor r10,r24 + eor r11,r25 + eor r12,r16 + eor r13,r17 + movw r18,r6 + movw r14,r8 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + eor r18,r6 + eor r19,r7 + eor r14,r8 + eor r15,r9 + movw r24,r10 + movw r16,r12 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + eor r24,r10 + eor r25,r11 + eor r16,r12 + eor r17,r13 + mov r26,r17 + mov r27,r24 + mov r1,r25 + mov r0,r16 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r6,r26 + eor r7,r27 + eor r8,r1 + eor r9,r0 + mov r26,r15 + mov r27,r18 + mov r1,r19 + mov r0,r14 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r10,r26 + eor r11,r27 + eor r12,r1 + eor r13,r0 + bst r19,7 + lsl r14 + rol r15 + rol r18 + rol r19 + bld r14,0 + eor r6,r14 + eor r7,r15 + eor r8,r18 + eor r9,r19 + bst r25,7 + lsl r16 + rol r17 + rol r24 + rol r25 + bld r16,0 + eor r10,r16 + eor r11,r17 + eor r12,r24 + eor r13,r25 + ret +635: + ldd r18,Y+9 + ldd r19,Y+10 + ldd r14,Y+11 + ldd r15,Y+12 + ldd r24,Y+1 + ldd r25,Y+2 + ldd r16,Y+3 + ldd r17,Y+4 + eor r18,r24 + eor r19,r25 + eor r14,r16 + eor r15,r17 + std Y+9,r24 + std Y+10,r25 + std Y+11,r16 + std Y+12,r17 + std Y+1,r18 + std Y+2,r19 + std Y+3,r14 + std Y+4,r15 + eor r20,r18 + eor r21,r19 + eor r22,r14 + eor r23,r15 + eor r6,r24 + eor r7,r25 + eor r8,r16 + eor r9,r17 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r14,Y+15 + ldd r15,Y+16 + ldd r24,Y+5 + ldd r25,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + eor r18,r24 + eor r19,r25 + eor r14,r16 + eor r15,r17 + std Y+13,r24 + std Y+14,r25 + std Y+15,r16 + std Y+16,r17 + std Y+5,r18 + std Y+6,r19 + std Y+7,r14 + std Y+8,r15 + eor r2,r18 + eor r3,r19 + eor r4,r14 + eor r5,r15 + eor r10,r24 + eor r11,r25 + eor r12,r16 + eor r13,r17 + ld r0,Z + eor r20,r0 + ldd r0,Z+1 + eor r21,r0 + ldd r0,Z+2 + eor r22,r0 + ldd r0,Z+3 + eor r23,r0 + ldd r0,Z+4 + eor r2,r0 + ldd r0,Z+5 + eor r3,r0 + ldd r0,Z+6 + eor r4,r0 + ldd r0,Z+7 + eor r5,r0 + ldd r0,Z+8 + eor r6,r0 + ldd r0,Z+9 + eor r7,r0 + ldd r0,Z+10 + eor r8,r0 + ldd r0,Z+11 + eor r9,r0 + ldd r0,Z+12 + eor r10,r0 + ldd r0,Z+13 + eor r11,r0 + ldd r0,Z+14 + eor r12,r0 + ldd r0,Z+15 + eor r13,r0 + ret +725: + ldd r26,Y+17 + ldd r27,Y+18 + st X+,r20 + st X+,r21 + st X+,r22 + st X+,r23 + st X+,r2 + st X+,r3 + st X+,r4 + st X+,r5 + st X+,r6 + st X+,r7 + st X+,r8 + st X+,r9 + st X+,r10 + st X+,r11 + st X+,r12 + st X+,r13 + adiw r28,18 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size clyde128_encrypt, .-clyde128_encrypt + + .text +.global clyde128_decrypt + .type clyde128_decrypt, @function +clyde128_decrypt: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r23 + push r22 + movw r30,r24 + movw r26,r20 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 36 + ld r20,X+ + ld r21,X+ + ld r22,X+ + ld r23,X+ + ld r2,X+ + ld r3,X+ + ld r4,X+ + ld r5,X+ + ld r6,X+ + ld r7,X+ + ld r8,X+ + ld r9,X+ + ld r10,X+ + ld r11,X+ + ld r12,X+ + ld r13,X+ + movw r26,r18 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+1,r18 + std Y+2,r19 + std Y+3,r14 + std Y+4,r15 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+5,r18 + std Y+6,r19 + std Y+7,r14 + std Y+8,r15 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+9,r18 + std Y+10,r19 + std Y+11,r14 + std Y+12,r15 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+13,r18 + std Y+14,r19 + std Y+15,r14 + std Y+16,r15 + rcall 533f + ldi r27,1 + eor r2,r27 + eor r6,r27 + eor r10,r27 + rcall 103f + ldi r27,1 + eor r20,r27 + eor r2,r27 + eor r6,r27 + rcall 103f + rcall 533f + ldi r27,1 + eor r2,r27 + eor r10,r27 + rcall 103f + ldi r27,1 + eor r20,r27 + eor r6,r27 + rcall 103f + rcall 533f + ldi r27,1 + eor r20,r27 + eor r2,r27 + eor r10,r27 + rcall 103f + ldi r27,1 + eor r6,r27 + eor r10,r27 + rcall 103f + rcall 533f + ldi r27,1 + eor r2,r27 + eor r6,r27 + rcall 103f + ldi r27,1 + eor r20,r27 + eor r2,r27 + rcall 103f + rcall 533f + ldi r27,1 + eor r10,r27 + rcall 103f + ldi r27,1 + eor r6,r27 + rcall 103f + rcall 533f + ldi r27,1 + eor r2,r27 + rcall 103f + ldi r27,1 + eor r20,r27 + rcall 103f + rjmp 623f +103: + mov r18,r23 + mov r19,r20 + mov r14,r21 + mov r15,r22 + bst r18,0 + lsr r15 + ror r14 + ror r19 + ror r18 + bld r15,7 + eor r18,r20 + eor r19,r21 + eor r14,r22 + eor r15,r23 + mov r24,r5 + mov r25,r2 + mov r16,r3 + mov r17,r4 + bst r24,0 + lsr r17 + ror r16 + ror r25 + ror r24 + bld r17,7 + eor r24,r2 + eor r25,r3 + eor r16,r4 + eor r17,r5 + movw r26,r18 + mov r1,r14 + mov r0,r15 + bst r0,7 + lsl r26 + rol r27 + rol r1 + rol r0 + bld r26,0 + eor r20,r26 + eor r21,r27 + eor r22,r1 + eor r23,r0 + movw r26,r24 + mov r1,r16 + mov r0,r17 + bst r0,7 + lsl r26 + rol r27 + rol r1 + rol r0 + bld r26,0 + eor r2,r26 + eor r3,r27 + eor r4,r1 + eor r5,r0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + eor r20,r15 + eor r21,r18 + eor r22,r19 + eor r23,r14 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + eor r2,r17 + eor r3,r24 + eor r4,r25 + eor r5,r16 + movw r18,r20 + movw r14,r22 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + eor r18,r20 + eor r19,r21 + eor r14,r22 + eor r15,r23 + movw r24,r2 + movw r16,r4 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + eor r24,r2 + eor r25,r3 + eor r16,r4 + eor r17,r5 + mov r26,r17 + mov r27,r24 + mov r1,r25 + mov r0,r16 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r20,r26 + eor r21,r27 + eor r22,r1 + eor r23,r0 + mov r26,r15 + mov r27,r18 + mov r1,r19 + mov r0,r14 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r2,r26 + eor r3,r27 + eor r4,r1 + eor r5,r0 + bst r20,0 + lsr r23 + ror r22 + ror r21 + ror r20 + bld r23,7 + eor r18,r22 + eor r19,r23 + eor r14,r20 + eor r15,r21 + bst r2,0 + lsr r5 + ror r4 + ror r3 + ror r2 + bld r5,7 + eor r24,r4 + eor r25,r5 + eor r16,r2 + eor r17,r3 + movw r20,r14 + movw r22,r18 + movw r2,r16 + movw r4,r24 + mov r18,r9 + mov r19,r6 + mov r14,r7 + mov r15,r8 + bst r18,0 + lsr r15 + ror r14 + ror r19 + ror r18 + bld r15,7 + eor r18,r6 + eor r19,r7 + eor r14,r8 + eor r15,r9 + mov r24,r13 + mov r25,r10 + mov r16,r11 + mov r17,r12 + bst r24,0 + lsr r17 + ror r16 + ror r25 + ror r24 + bld r17,7 + eor r24,r10 + eor r25,r11 + eor r16,r12 + eor r17,r13 + movw r26,r18 + mov r1,r14 + mov r0,r15 + bst r0,7 + lsl r26 + rol r27 + rol r1 + rol r0 + bld r26,0 + eor r6,r26 + eor r7,r27 + eor r8,r1 + eor r9,r0 + movw r26,r24 + mov r1,r16 + mov r0,r17 + bst r0,7 + lsl r26 + rol r27 + rol r1 + rol r0 + bld r26,0 + eor r10,r26 + eor r11,r27 + eor r12,r1 + eor r13,r0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + eor r6,r15 + eor r7,r18 + eor r8,r19 + eor r9,r14 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + eor r10,r17 + eor r11,r24 + eor r12,r25 + eor r13,r16 + movw r18,r6 + movw r14,r8 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + eor r18,r6 + eor r19,r7 + eor r14,r8 + eor r15,r9 + movw r24,r10 + movw r16,r12 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + eor r24,r10 + eor r25,r11 + eor r16,r12 + eor r17,r13 + mov r26,r17 + mov r27,r24 + mov r1,r25 + mov r0,r16 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r6,r26 + eor r7,r27 + eor r8,r1 + eor r9,r0 + mov r26,r15 + mov r27,r18 + mov r1,r19 + mov r0,r14 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r10,r26 + eor r11,r27 + eor r12,r1 + eor r13,r0 + bst r6,0 + lsr r9 + ror r8 + ror r7 + ror r6 + bld r9,7 + eor r18,r8 + eor r19,r9 + eor r14,r6 + eor r15,r7 + bst r10,0 + lsr r13 + ror r12 + ror r11 + ror r10 + bld r13,7 + eor r24,r12 + eor r25,r13 + eor r16,r10 + eor r17,r11 + movw r6,r14 + movw r8,r18 + movw r10,r16 + movw r12,r24 + movw r24,r20 + movw r16,r22 + and r24,r2 + and r25,r3 + and r16,r4 + and r17,r5 + eor r24,r6 + eor r25,r7 + eor r16,r8 + eor r17,r9 + movw r18,r2 + movw r14,r4 + and r18,r24 + and r19,r25 + and r14,r16 + and r15,r17 + eor r18,r10 + eor r19,r11 + eor r14,r12 + eor r15,r13 + movw r10,r24 + movw r12,r16 + and r24,r18 + and r25,r19 + and r16,r14 + and r17,r15 + eor r24,r20 + eor r25,r21 + eor r16,r22 + eor r17,r23 + movw r6,r18 + movw r8,r14 + and r6,r24 + and r7,r25 + and r8,r16 + and r9,r17 + eor r6,r2 + eor r7,r3 + eor r8,r4 + eor r9,r5 + movw r20,r18 + movw r22,r14 + movw r2,r24 + movw r4,r16 + ret +533: + ld r0,Z + eor r20,r0 + ldd r0,Z+1 + eor r21,r0 + ldd r0,Z+2 + eor r22,r0 + ldd r0,Z+3 + eor r23,r0 + ldd r0,Z+4 + eor r2,r0 + ldd r0,Z+5 + eor r3,r0 + ldd r0,Z+6 + eor r4,r0 + ldd r0,Z+7 + eor r5,r0 + ldd r0,Z+8 + eor r6,r0 + ldd r0,Z+9 + eor r7,r0 + ldd r0,Z+10 + eor r8,r0 + ldd r0,Z+11 + eor r9,r0 + ldd r0,Z+12 + eor r10,r0 + ldd r0,Z+13 + eor r11,r0 + ldd r0,Z+14 + eor r12,r0 + ldd r0,Z+15 + eor r13,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r14,Y+3 + ldd r15,Y+4 + ldd r24,Y+9 + ldd r25,Y+10 + ldd r16,Y+11 + ldd r17,Y+12 + eor r20,r18 + eor r21,r19 + eor r22,r14 + eor r23,r15 + eor r6,r24 + eor r7,r25 + eor r8,r16 + eor r9,r17 + eor r18,r24 + eor r19,r25 + eor r14,r16 + eor r15,r17 + std Y+1,r24 + std Y+2,r25 + std Y+3,r16 + std Y+4,r17 + std Y+9,r18 + std Y+10,r19 + std Y+11,r14 + std Y+12,r15 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r14,Y+7 + ldd r15,Y+8 + ldd r24,Y+13 + ldd r25,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + eor r2,r18 + eor r3,r19 + eor r4,r14 + eor r5,r15 + eor r10,r24 + eor r11,r25 + eor r12,r16 + eor r13,r17 + eor r18,r24 + eor r19,r25 + eor r14,r16 + eor r15,r17 + std Y+5,r24 + std Y+6,r25 + std Y+7,r16 + std Y+8,r17 + std Y+13,r18 + std Y+14,r19 + std Y+15,r14 + std Y+16,r15 + ret +623: + ld r0,Z + eor r20,r0 + ldd r0,Z+1 + eor r21,r0 + ldd r0,Z+2 + eor r22,r0 + ldd r0,Z+3 + eor r23,r0 + ldd r0,Z+4 + eor r2,r0 + ldd r0,Z+5 + eor r3,r0 + ldd r0,Z+6 + eor r4,r0 + ldd r0,Z+7 + eor r5,r0 + ldd r0,Z+8 + eor r6,r0 + ldd r0,Z+9 + eor r7,r0 + ldd r0,Z+10 + eor r8,r0 + ldd r0,Z+11 + eor r9,r0 + ldd r0,Z+12 + eor r10,r0 + ldd r0,Z+13 + eor r11,r0 + ldd r0,Z+14 + eor r12,r0 + ldd r0,Z+15 + eor r13,r0 + ldd r0,Y+1 + eor r20,r0 + ldd r0,Y+2 + eor r21,r0 + ldd r0,Y+3 + eor r22,r0 + ldd r0,Y+4 + eor r23,r0 + ldd r0,Y+5 + eor r2,r0 + ldd r0,Y+6 + eor r3,r0 + ldd r0,Y+7 + eor r4,r0 + ldd r0,Y+8 + eor r5,r0 + ldd r0,Y+9 + eor r6,r0 + ldd r0,Y+10 + eor r7,r0 + ldd r0,Y+11 + eor r8,r0 + ldd r0,Y+12 + eor r9,r0 + ldd r0,Y+13 + eor r10,r0 + ldd r0,Y+14 + eor r11,r0 + ldd r0,Y+15 + eor r12,r0 + ldd r0,Y+16 + eor r13,r0 + ldd r26,Y+17 + ldd r27,Y+18 + st X+,r20 + st X+,r21 + st X+,r22 + st X+,r23 + st X+,r2 + st X+,r3 + st X+,r4 + st X+,r5 + st X+,r6 + st X+,r7 + st X+,r8 + st X+,r9 + st X+,r10 + st X+,r11 + st X+,r12 + st X+,r13 + adiw r28,18 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size clyde128_decrypt, .-clyde128_decrypt + + .text +.global shadow512 + .type shadow512, @function +shadow512: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 1083f + rcall 1127f + ldi r25,1 + eor r18,r25 + rcall 1083f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 1083f + rcall 1127f + ldi r25,2 + eor r18,r25 + rcall 1083f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 1083f + rcall 1127f + ldi r25,4 + eor r18,r25 + rcall 1083f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + ldd r18,Z+48 + ldd r19,Z+49 + ldd r20,Z+50 + ldd r21,Z+51 + ldd r22,Z+52 + ldd r23,Z+53 + ldd r26,Z+54 + ldd r27,Z+55 + ldd r28,Z+56 + ldd r29,Z+57 + ldd r2,Z+58 + ldd r3,Z+59 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + rcall 1083f + rcall 1127f + ldi r25,8 + eor r18,r25 + rcall 1083f + std Z+48,r18 + std Z+49,r19 + std Z+50,r20 + std Z+51,r21 + std Z+52,r22 + std Z+53,r23 + std Z+54,r26 + std Z+55,r27 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + std Z+60,r4 + std Z+61,r5 + std Z+62,r6 + std Z+63,r7 + rcall 1553f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+52 + ldi r16,8 + eor r18,r16 + std Z+52,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 1083f + rcall 1127f + ldi r25,1 + eor r28,r25 + rcall 1083f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 1083f + rcall 1127f + ldi r25,2 + eor r28,r25 + rcall 1083f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 1083f + rcall 1127f + ldi r25,4 + eor r28,r25 + rcall 1083f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + ldd r18,Z+48 + ldd r19,Z+49 + ldd r20,Z+50 + ldd r21,Z+51 + ldd r22,Z+52 + ldd r23,Z+53 + ldd r26,Z+54 + ldd r27,Z+55 + ldd r28,Z+56 + ldd r29,Z+57 + ldd r2,Z+58 + ldd r3,Z+59 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + rcall 1083f + rcall 1127f + ldi r25,8 + eor r28,r25 + rcall 1083f + std Z+48,r18 + std Z+49,r19 + std Z+50,r20 + std Z+51,r21 + std Z+52,r22 + std Z+53,r23 + std Z+54,r26 + std Z+55,r27 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + std Z+60,r4 + std Z+61,r5 + std Z+62,r6 + std Z+63,r7 + rcall 1553f + ldd r18,Z+12 + ldi r25,1 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+28 + ldi r24,2 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+44 + ldi r17,4 + eor r18,r17 + std Z+44,r18 + ldd r18,Z+60 + ldi r16,8 + eor r18,r16 + std Z+60,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 1083f + rcall 1127f + ldi r25,1 + eor r18,r25 + eor r22,r25 + rcall 1083f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 1083f + rcall 1127f + ldi r25,2 + eor r18,r25 + eor r22,r25 + rcall 1083f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 1083f + rcall 1127f + ldi r25,4 + eor r18,r25 + eor r22,r25 + rcall 1083f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + ldd r18,Z+48 + ldd r19,Z+49 + ldd r20,Z+50 + ldd r21,Z+51 + ldd r22,Z+52 + ldd r23,Z+53 + ldd r26,Z+54 + ldd r27,Z+55 + ldd r28,Z+56 + ldd r29,Z+57 + ldd r2,Z+58 + ldd r3,Z+59 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + rcall 1083f + rcall 1127f + ldi r25,8 + eor r18,r25 + eor r22,r25 + rcall 1083f + std Z+48,r18 + std Z+49,r19 + std Z+50,r20 + std Z+51,r21 + std Z+52,r22 + std Z+53,r23 + std Z+54,r26 + std Z+55,r27 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + std Z+60,r4 + std Z+61,r5 + std Z+62,r6 + std Z+63,r7 + rcall 1553f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+8 + eor r18,r25 + std Z+8,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+24 + eor r18,r24 + std Z+24,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+40 + eor r18,r17 + std Z+40,r18 + ldd r18,Z+52 + ldi r16,8 + eor r18,r16 + std Z+52,r18 + ldd r18,Z+56 + eor r18,r16 + std Z+56,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 1083f + rcall 1127f + ldi r25,1 + eor r28,r25 + eor r4,r25 + rcall 1083f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 1083f + rcall 1127f + ldi r25,2 + eor r28,r25 + eor r4,r25 + rcall 1083f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 1083f + rcall 1127f + ldi r25,4 + eor r28,r25 + eor r4,r25 + rcall 1083f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + ldd r18,Z+48 + ldd r19,Z+49 + ldd r20,Z+50 + ldd r21,Z+51 + ldd r22,Z+52 + ldd r23,Z+53 + ldd r26,Z+54 + ldd r27,Z+55 + ldd r28,Z+56 + ldd r29,Z+57 + ldd r2,Z+58 + ldd r3,Z+59 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + rcall 1083f + rcall 1127f + ldi r25,8 + eor r28,r25 + eor r4,r25 + rcall 1083f + std Z+48,r18 + std Z+49,r19 + std Z+50,r20 + std Z+51,r21 + std Z+52,r22 + std Z+53,r23 + std Z+54,r26 + std Z+55,r27 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + std Z+60,r4 + std Z+61,r5 + std Z+62,r6 + std Z+63,r7 + rcall 1553f + ld r18,Z + ldi r25,1 + eor r18,r25 + st Z,r18 + ldd r18,Z+4 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+12 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+16 + ldi r24,2 + eor r18,r24 + std Z+16,r18 + ldd r18,Z+20 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+28 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+32 + ldi r17,4 + eor r18,r17 + std Z+32,r18 + ldd r18,Z+36 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+44 + eor r18,r17 + std Z+44,r18 + ldd r18,Z+48 + ldi r16,8 + eor r18,r16 + std Z+48,r18 + ldd r18,Z+52 + eor r18,r16 + std Z+52,r18 + ldd r18,Z+60 + eor r18,r16 + std Z+60,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 1083f + rcall 1127f + ldi r25,1 + eor r18,r25 + eor r28,r25 + rcall 1083f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 1083f + rcall 1127f + ldi r25,2 + eor r18,r25 + eor r28,r25 + rcall 1083f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 1083f + rcall 1127f + ldi r25,4 + eor r18,r25 + eor r28,r25 + rcall 1083f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + ldd r18,Z+48 + ldd r19,Z+49 + ldd r20,Z+50 + ldd r21,Z+51 + ldd r22,Z+52 + ldd r23,Z+53 + ldd r26,Z+54 + ldd r27,Z+55 + ldd r28,Z+56 + ldd r29,Z+57 + ldd r2,Z+58 + ldd r3,Z+59 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + rcall 1083f + rcall 1127f + ldi r25,8 + eor r18,r25 + eor r28,r25 + rcall 1083f + std Z+48,r18 + std Z+49,r19 + std Z+50,r20 + std Z+51,r21 + std Z+52,r22 + std Z+53,r23 + std Z+54,r26 + std Z+55,r27 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + std Z+60,r4 + std Z+61,r5 + std Z+62,r6 + std Z+63,r7 + rcall 1553f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+12 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+28 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+44 + eor r18,r17 + std Z+44,r18 + ldd r18,Z+52 + ldi r16,8 + eor r18,r16 + std Z+52,r18 + ldd r18,Z+60 + eor r18,r16 + std Z+60,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 1083f + rcall 1127f + ldi r25,1 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 1083f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 1083f + rcall 1127f + ldi r25,2 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 1083f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 1083f + rcall 1127f + ldi r25,4 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 1083f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + ldd r18,Z+48 + ldd r19,Z+49 + ldd r20,Z+50 + ldd r21,Z+51 + ldd r22,Z+52 + ldd r23,Z+53 + ldd r26,Z+54 + ldd r27,Z+55 + ldd r28,Z+56 + ldd r29,Z+57 + ldd r2,Z+58 + ldd r3,Z+59 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + rcall 1083f + rcall 1127f + ldi r25,8 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 1083f + std Z+48,r18 + std Z+49,r19 + std Z+50,r20 + std Z+51,r21 + std Z+52,r22 + std Z+53,r23 + std Z+54,r26 + std Z+55,r27 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + std Z+60,r4 + std Z+61,r5 + std Z+62,r6 + std Z+63,r7 + rcall 1553f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+8 + eor r18,r25 + std Z+8,r18 + ldd r18,Z+12 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+24 + eor r18,r24 + std Z+24,r18 + ldd r18,Z+28 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+40 + eor r18,r17 + std Z+40,r18 + ldd r18,Z+44 + eor r18,r17 + std Z+44,r18 + ldd r18,Z+52 + ldi r16,8 + eor r18,r16 + std Z+52,r18 + ldd r18,Z+56 + eor r18,r16 + std Z+56,r18 + ldd r18,Z+60 + eor r18,r16 + std Z+60,r18 + rjmp 1795f +1083: + movw r8,r18 + movw r10,r20 + and r8,r22 + and r9,r23 + and r10,r26 + and r11,r27 + eor r8,r28 + eor r9,r29 + eor r10,r2 + eor r11,r3 + movw r12,r4 + movw r14,r6 + and r12,r18 + and r13,r19 + and r14,r20 + and r15,r21 + eor r12,r22 + eor r13,r23 + eor r14,r26 + eor r15,r27 + movw r28,r8 + movw r2,r10 + and r28,r12 + and r29,r13 + and r2,r14 + and r3,r15 + eor r28,r4 + eor r29,r5 + eor r2,r6 + eor r3,r7 + and r4,r8 + and r5,r9 + and r6,r10 + and r7,r11 + eor r4,r18 + eor r5,r19 + eor r6,r20 + eor r7,r21 + movw r18,r12 + movw r20,r14 + movw r22,r8 + movw r26,r10 + ret +1127: + mov r8,r19 + mov r9,r20 + mov r10,r21 + mov r11,r18 + mov r0,r1 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + or r11,r0 + eor r8,r18 + eor r9,r19 + eor r10,r20 + eor r11,r21 + mov r12,r23 + mov r13,r26 + mov r14,r27 + mov r15,r22 + mov r0,r1 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + or r15,r0 + eor r12,r22 + eor r13,r23 + eor r14,r26 + eor r15,r27 + movw r24,r8 + movw r16,r10 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r8,r24 + eor r9,r25 + eor r10,r16 + eor r11,r17 + movw r24,r12 + movw r16,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r12,r24 + eor r13,r25 + eor r14,r16 + eor r15,r17 + mov r0,r20 + mov r20,r18 + mov r18,r0 + mov r0,r21 + mov r21,r19 + mov r19,r0 + bst r18,0 + lsr r21 + ror r20 + ror r19 + ror r18 + bld r21,7 + eor r18,r8 + eor r19,r9 + eor r20,r10 + eor r21,r11 + mov r0,r26 + mov r26,r22 + mov r22,r0 + mov r0,r27 + mov r27,r23 + mov r23,r0 + bst r22,0 + lsr r27 + ror r26 + ror r23 + ror r22 + bld r27,7 + eor r22,r12 + eor r23,r13 + eor r26,r14 + eor r27,r15 + movw r8,r18 + movw r10,r20 + lsl r8 + rol r9 + rol r10 + rol r11 + adc r8,r1 + eor r8,r18 + eor r9,r19 + eor r10,r20 + eor r11,r21 + movw r12,r22 + movw r14,r26 + lsl r12 + rol r13 + rol r14 + rol r15 + adc r12,r1 + eor r12,r22 + eor r13,r23 + eor r14,r26 + eor r15,r27 + mov r24,r15 + mov r25,r12 + mov r16,r13 + mov r17,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r18,r24 + eor r19,r25 + eor r20,r16 + eor r21,r17 + mov r24,r11 + mov r25,r8 + mov r16,r9 + mov r17,r10 + bst r24,0 + lsr r17 + ror r16 + ror r25 + ror r24 + bld r17,7 + eor r22,r24 + eor r23,r25 + eor r26,r16 + eor r27,r17 + lsl r10 + rol r11 + rol r8 + rol r9 + adc r10,r1 + eor r18,r10 + eor r19,r11 + eor r20,r8 + eor r21,r9 + lsl r14 + rol r15 + rol r12 + rol r13 + adc r14,r1 + eor r22,r14 + eor r23,r15 + eor r26,r12 + eor r27,r13 + mov r8,r29 + mov r9,r2 + mov r10,r3 + mov r11,r28 + mov r0,r1 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + or r11,r0 + eor r8,r28 + eor r9,r29 + eor r10,r2 + eor r11,r3 + mov r12,r5 + mov r13,r6 + mov r14,r7 + mov r15,r4 + mov r0,r1 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + or r15,r0 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + movw r24,r8 + movw r16,r10 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r8,r24 + eor r9,r25 + eor r10,r16 + eor r11,r17 + movw r24,r12 + movw r16,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r12,r24 + eor r13,r25 + eor r14,r16 + eor r15,r17 + mov r0,r2 + mov r2,r28 + mov r28,r0 + mov r0,r3 + mov r3,r29 + mov r29,r0 + bst r28,0 + lsr r3 + ror r2 + ror r29 + ror r28 + bld r3,7 + eor r28,r8 + eor r29,r9 + eor r2,r10 + eor r3,r11 + mov r0,r6 + mov r6,r4 + mov r4,r0 + mov r0,r7 + mov r7,r5 + mov r5,r0 + bst r4,0 + lsr r7 + ror r6 + ror r5 + ror r4 + bld r7,7 + eor r4,r12 + eor r5,r13 + eor r6,r14 + eor r7,r15 + movw r8,r28 + movw r10,r2 + lsl r8 + rol r9 + rol r10 + rol r11 + adc r8,r1 + eor r8,r28 + eor r9,r29 + eor r10,r2 + eor r11,r3 + movw r12,r4 + movw r14,r6 + lsl r12 + rol r13 + rol r14 + rol r15 + adc r12,r1 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + mov r24,r15 + mov r25,r12 + mov r16,r13 + mov r17,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r28,r24 + eor r29,r25 + eor r2,r16 + eor r3,r17 + mov r24,r11 + mov r25,r8 + mov r16,r9 + mov r17,r10 + bst r24,0 + lsr r17 + ror r16 + ror r25 + ror r24 + bld r17,7 + eor r4,r24 + eor r5,r25 + eor r6,r16 + eor r7,r17 + lsl r10 + rol r11 + rol r8 + rol r9 + adc r10,r1 + eor r28,r10 + eor r29,r11 + eor r2,r8 + eor r3,r9 + lsl r14 + rol r15 + rol r12 + rol r13 + adc r14,r1 + eor r4,r14 + eor r5,r15 + eor r6,r12 + eor r7,r13 + ret +1553: + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+16 + ldd r23,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + ldd r28,Z+32 + ldd r29,Z+33 + ldd r2,Z+34 + ldd r3,Z+35 + ldd r4,Z+48 + ldd r5,Z+49 + ldd r6,Z+50 + ldd r7,Z+51 + movw r8,r18 + movw r10,r20 + eor r8,r22 + eor r9,r23 + eor r10,r26 + eor r11,r27 + movw r12,r28 + movw r14,r2 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + eor r18,r12 + eor r19,r13 + eor r20,r14 + eor r21,r15 + eor r22,r12 + eor r23,r13 + eor r26,r14 + eor r27,r15 + eor r28,r8 + eor r29,r9 + eor r2,r10 + eor r3,r11 + eor r4,r8 + eor r5,r9 + eor r6,r10 + eor r7,r11 + st Z,r22 + std Z+1,r23 + std Z+2,r26 + std Z+3,r27 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+32,r4 + std Z+33,r5 + std Z+34,r6 + std Z+35,r7 + std Z+48,r28 + std Z+49,r29 + std Z+50,r2 + std Z+51,r3 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r20,Z+6 + ldd r21,Z+7 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+36 + ldd r29,Z+37 + ldd r2,Z+38 + ldd r3,Z+39 + ldd r4,Z+52 + ldd r5,Z+53 + ldd r6,Z+54 + ldd r7,Z+55 + movw r8,r18 + movw r10,r20 + eor r8,r22 + eor r9,r23 + eor r10,r26 + eor r11,r27 + movw r12,r28 + movw r14,r2 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + eor r18,r12 + eor r19,r13 + eor r20,r14 + eor r21,r15 + eor r22,r12 + eor r23,r13 + eor r26,r14 + eor r27,r15 + eor r28,r8 + eor r29,r9 + eor r2,r10 + eor r3,r11 + eor r4,r8 + eor r5,r9 + eor r6,r10 + eor r7,r11 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + std Z+36,r4 + std Z+37,r5 + std Z+38,r6 + std Z+39,r7 + std Z+52,r28 + std Z+53,r29 + std Z+54,r2 + std Z+55,r3 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r20,Z+10 + ldd r21,Z+11 + ldd r22,Z+24 + ldd r23,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+56 + ldd r5,Z+57 + ldd r6,Z+58 + ldd r7,Z+59 + movw r8,r18 + movw r10,r20 + eor r8,r22 + eor r9,r23 + eor r10,r26 + eor r11,r27 + movw r12,r28 + movw r14,r2 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + eor r18,r12 + eor r19,r13 + eor r20,r14 + eor r21,r15 + eor r22,r12 + eor r23,r13 + eor r26,r14 + eor r27,r15 + eor r28,r8 + eor r29,r9 + eor r2,r10 + eor r3,r11 + eor r4,r8 + eor r5,r9 + eor r6,r10 + eor r7,r11 + std Z+8,r22 + std Z+9,r23 + std Z+10,r26 + std Z+11,r27 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + std Z+40,r4 + std Z+41,r5 + std Z+42,r6 + std Z+43,r7 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r20,Z+14 + ldd r21,Z+15 + ldd r22,Z+28 + ldd r23,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + ldd r28,Z+44 + ldd r29,Z+45 + ldd r2,Z+46 + ldd r3,Z+47 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + movw r8,r18 + movw r10,r20 + eor r8,r22 + eor r9,r23 + eor r10,r26 + eor r11,r27 + movw r12,r28 + movw r14,r2 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + eor r18,r12 + eor r19,r13 + eor r20,r14 + eor r21,r15 + eor r22,r12 + eor r23,r13 + eor r26,r14 + eor r27,r15 + eor r28,r8 + eor r29,r9 + eor r2,r10 + eor r3,r11 + eor r4,r8 + eor r5,r9 + eor r6,r10 + eor r7,r11 + std Z+12,r22 + std Z+13,r23 + std Z+14,r26 + std Z+15,r27 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + std Z+60,r28 + std Z+61,r29 + std Z+62,r2 + std Z+63,r3 + ret +1795: + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size shadow512, .-shadow512 + + .text +.global shadow384 + .type shadow384, @function +shadow384: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 814f + rcall 858f + ldi r25,1 + eor r18,r25 + rcall 814f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 814f + rcall 858f + ldi r25,2 + eor r18,r25 + rcall 814f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 814f + rcall 858f + ldi r25,4 + eor r18,r25 + rcall 814f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + rcall 1284f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 814f + rcall 858f + ldi r25,1 + eor r28,r25 + rcall 814f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 814f + rcall 858f + ldi r25,2 + eor r28,r25 + rcall 814f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 814f + rcall 858f + ldi r25,4 + eor r28,r25 + rcall 814f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + rcall 1284f + ldd r18,Z+12 + ldi r25,1 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+28 + ldi r24,2 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+44 + ldi r17,4 + eor r18,r17 + std Z+44,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 814f + rcall 858f + ldi r25,1 + eor r18,r25 + eor r22,r25 + rcall 814f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 814f + rcall 858f + ldi r25,2 + eor r18,r25 + eor r22,r25 + rcall 814f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 814f + rcall 858f + ldi r25,4 + eor r18,r25 + eor r22,r25 + rcall 814f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + rcall 1284f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+8 + eor r18,r25 + std Z+8,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+24 + eor r18,r24 + std Z+24,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+40 + eor r18,r17 + std Z+40,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 814f + rcall 858f + ldi r25,1 + eor r28,r25 + eor r4,r25 + rcall 814f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 814f + rcall 858f + ldi r25,2 + eor r28,r25 + eor r4,r25 + rcall 814f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 814f + rcall 858f + ldi r25,4 + eor r28,r25 + eor r4,r25 + rcall 814f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + rcall 1284f + ld r18,Z + ldi r25,1 + eor r18,r25 + st Z,r18 + ldd r18,Z+4 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+12 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+16 + ldi r24,2 + eor r18,r24 + std Z+16,r18 + ldd r18,Z+20 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+28 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+32 + ldi r17,4 + eor r18,r17 + std Z+32,r18 + ldd r18,Z+36 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+44 + eor r18,r17 + std Z+44,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 814f + rcall 858f + ldi r25,1 + eor r18,r25 + eor r28,r25 + rcall 814f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 814f + rcall 858f + ldi r25,2 + eor r18,r25 + eor r28,r25 + rcall 814f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 814f + rcall 858f + ldi r25,4 + eor r18,r25 + eor r28,r25 + rcall 814f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + rcall 1284f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+12 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+28 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+44 + eor r18,r17 + std Z+44,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 814f + rcall 858f + ldi r25,1 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 814f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 814f + rcall 858f + ldi r25,2 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 814f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 814f + rcall 858f + ldi r25,4 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 814f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + rcall 1284f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+8 + eor r18,r25 + std Z+8,r18 + ldd r18,Z+12 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+24 + eor r18,r24 + std Z+24,r18 + ldd r18,Z+28 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+40 + eor r18,r17 + std Z+40,r18 + ldd r18,Z+44 + eor r18,r17 + std Z+44,r18 + rjmp 1430f +814: + movw r8,r18 + movw r10,r20 + and r8,r22 + and r9,r23 + and r10,r26 + and r11,r27 + eor r8,r28 + eor r9,r29 + eor r10,r2 + eor r11,r3 + movw r12,r4 + movw r14,r6 + and r12,r18 + and r13,r19 + and r14,r20 + and r15,r21 + eor r12,r22 + eor r13,r23 + eor r14,r26 + eor r15,r27 + movw r28,r8 + movw r2,r10 + and r28,r12 + and r29,r13 + and r2,r14 + and r3,r15 + eor r28,r4 + eor r29,r5 + eor r2,r6 + eor r3,r7 + and r4,r8 + and r5,r9 + and r6,r10 + and r7,r11 + eor r4,r18 + eor r5,r19 + eor r6,r20 + eor r7,r21 + movw r18,r12 + movw r20,r14 + movw r22,r8 + movw r26,r10 + ret +858: + mov r8,r19 + mov r9,r20 + mov r10,r21 + mov r11,r18 + mov r0,r1 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + or r11,r0 + eor r8,r18 + eor r9,r19 + eor r10,r20 + eor r11,r21 + mov r12,r23 + mov r13,r26 + mov r14,r27 + mov r15,r22 + mov r0,r1 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + or r15,r0 + eor r12,r22 + eor r13,r23 + eor r14,r26 + eor r15,r27 + movw r24,r8 + movw r16,r10 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r8,r24 + eor r9,r25 + eor r10,r16 + eor r11,r17 + movw r24,r12 + movw r16,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r12,r24 + eor r13,r25 + eor r14,r16 + eor r15,r17 + mov r0,r20 + mov r20,r18 + mov r18,r0 + mov r0,r21 + mov r21,r19 + mov r19,r0 + bst r18,0 + lsr r21 + ror r20 + ror r19 + ror r18 + bld r21,7 + eor r18,r8 + eor r19,r9 + eor r20,r10 + eor r21,r11 + mov r0,r26 + mov r26,r22 + mov r22,r0 + mov r0,r27 + mov r27,r23 + mov r23,r0 + bst r22,0 + lsr r27 + ror r26 + ror r23 + ror r22 + bld r27,7 + eor r22,r12 + eor r23,r13 + eor r26,r14 + eor r27,r15 + movw r8,r18 + movw r10,r20 + lsl r8 + rol r9 + rol r10 + rol r11 + adc r8,r1 + eor r8,r18 + eor r9,r19 + eor r10,r20 + eor r11,r21 + movw r12,r22 + movw r14,r26 + lsl r12 + rol r13 + rol r14 + rol r15 + adc r12,r1 + eor r12,r22 + eor r13,r23 + eor r14,r26 + eor r15,r27 + mov r24,r15 + mov r25,r12 + mov r16,r13 + mov r17,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r18,r24 + eor r19,r25 + eor r20,r16 + eor r21,r17 + mov r24,r11 + mov r25,r8 + mov r16,r9 + mov r17,r10 + bst r24,0 + lsr r17 + ror r16 + ror r25 + ror r24 + bld r17,7 + eor r22,r24 + eor r23,r25 + eor r26,r16 + eor r27,r17 + lsl r10 + rol r11 + rol r8 + rol r9 + adc r10,r1 + eor r18,r10 + eor r19,r11 + eor r20,r8 + eor r21,r9 + lsl r14 + rol r15 + rol r12 + rol r13 + adc r14,r1 + eor r22,r14 + eor r23,r15 + eor r26,r12 + eor r27,r13 + mov r8,r29 + mov r9,r2 + mov r10,r3 + mov r11,r28 + mov r0,r1 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + or r11,r0 + eor r8,r28 + eor r9,r29 + eor r10,r2 + eor r11,r3 + mov r12,r5 + mov r13,r6 + mov r14,r7 + mov r15,r4 + mov r0,r1 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + or r15,r0 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + movw r24,r8 + movw r16,r10 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r8,r24 + eor r9,r25 + eor r10,r16 + eor r11,r17 + movw r24,r12 + movw r16,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r12,r24 + eor r13,r25 + eor r14,r16 + eor r15,r17 + mov r0,r2 + mov r2,r28 + mov r28,r0 + mov r0,r3 + mov r3,r29 + mov r29,r0 + bst r28,0 + lsr r3 + ror r2 + ror r29 + ror r28 + bld r3,7 + eor r28,r8 + eor r29,r9 + eor r2,r10 + eor r3,r11 + mov r0,r6 + mov r6,r4 + mov r4,r0 + mov r0,r7 + mov r7,r5 + mov r5,r0 + bst r4,0 + lsr r7 + ror r6 + ror r5 + ror r4 + bld r7,7 + eor r4,r12 + eor r5,r13 + eor r6,r14 + eor r7,r15 + movw r8,r28 + movw r10,r2 + lsl r8 + rol r9 + rol r10 + rol r11 + adc r8,r1 + eor r8,r28 + eor r9,r29 + eor r10,r2 + eor r11,r3 + movw r12,r4 + movw r14,r6 + lsl r12 + rol r13 + rol r14 + rol r15 + adc r12,r1 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + mov r24,r15 + mov r25,r12 + mov r16,r13 + mov r17,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r28,r24 + eor r29,r25 + eor r2,r16 + eor r3,r17 + mov r24,r11 + mov r25,r8 + mov r16,r9 + mov r17,r10 + bst r24,0 + lsr r17 + ror r16 + ror r25 + ror r24 + bld r17,7 + eor r4,r24 + eor r5,r25 + eor r6,r16 + eor r7,r17 + lsl r10 + rol r11 + rol r8 + rol r9 + adc r10,r1 + eor r28,r10 + eor r29,r11 + eor r2,r8 + eor r3,r9 + lsl r14 + rol r15 + rol r12 + rol r13 + adc r14,r1 + eor r4,r14 + eor r5,r15 + eor r6,r12 + eor r7,r13 + ret +1284: + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+16 + ldd r23,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + ldd r28,Z+32 + ldd r29,Z+33 + ldd r2,Z+34 + ldd r3,Z+35 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + std Z+32,r22 + std Z+33,r23 + std Z+34,r26 + std Z+35,r27 + eor r22,r28 + eor r23,r29 + eor r26,r2 + eor r27,r3 + st Z,r22 + std Z+1,r23 + std Z+2,r26 + std Z+3,r27 + eor r18,r28 + eor r19,r29 + eor r20,r2 + eor r21,r3 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r20,Z+6 + ldd r21,Z+7 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+36 + ldd r29,Z+37 + ldd r2,Z+38 + ldd r3,Z+39 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + eor r22,r28 + eor r23,r29 + eor r26,r2 + eor r27,r3 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + eor r18,r28 + eor r19,r29 + eor r20,r2 + eor r21,r3 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r20,Z+10 + ldd r21,Z+11 + ldd r22,Z+24 + ldd r23,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + std Z+40,r22 + std Z+41,r23 + std Z+42,r26 + std Z+43,r27 + eor r22,r28 + eor r23,r29 + eor r26,r2 + eor r27,r3 + std Z+8,r22 + std Z+9,r23 + std Z+10,r26 + std Z+11,r27 + eor r18,r28 + eor r19,r29 + eor r20,r2 + eor r21,r3 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r20,Z+14 + ldd r21,Z+15 + ldd r22,Z+28 + ldd r23,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + ldd r28,Z+44 + ldd r29,Z+45 + ldd r2,Z+46 + ldd r3,Z+47 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + std Z+44,r22 + std Z+45,r23 + std Z+46,r26 + std Z+47,r27 + eor r22,r28 + eor r23,r29 + eor r26,r2 + eor r27,r3 + std Z+12,r22 + std Z+13,r23 + std Z+14,r26 + std Z+15,r27 + eor r18,r28 + eor r19,r29 + eor r20,r2 + eor r21,r3 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ret +1430: + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size shadow384, .-shadow384 + +#endif diff --git a/spook/Implementations/crypto_aead/spook128mu512v1/rhys/internal-spook.c b/spook/Implementations/crypto_aead/spook128mu512v1/rhys/internal-spook.c index 0e19216..068938b 100644 --- a/spook/Implementations/crypto_aead/spook128mu512v1/rhys/internal-spook.c +++ b/spook/Implementations/crypto_aead/spook128mu512v1/rhys/internal-spook.c @@ -22,6 +22,8 @@ #include "internal-spook.h" +#if !defined(__AVR__) + /** * \brief Number of steps in the Clyde-128 block cipher. * @@ -43,9 +45,9 @@ static uint8_t const rc[CLYDE128_STEPS][8] = { }; void clyde128_encrypt(const unsigned char key[CLYDE128_KEY_SIZE], - const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4], uint32_t output[CLYDE128_BLOCK_SIZE / 4], - const uint32_t input[CLYDE128_BLOCK_SIZE / 4]) + const uint32_t input[CLYDE128_BLOCK_SIZE / 4], + const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4]) { uint32_t k0, k1, k2, k3; uint32_t t0, t1, t2, t3; @@ -154,9 +156,9 @@ void clyde128_encrypt(const unsigned char key[CLYDE128_KEY_SIZE], } void clyde128_decrypt(const unsigned char key[CLYDE128_KEY_SIZE], - const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4], uint32_t output[CLYDE128_BLOCK_SIZE / 4], - const unsigned char input[CLYDE128_BLOCK_SIZE]) + const unsigned char input[CLYDE128_BLOCK_SIZE], + const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4]) { uint32_t k0, k1, k2, k3; uint32_t t0, t1, t2, t3; @@ -555,3 +557,5 @@ void shadow384(shadow384_state_t *state) le_store_word32(state->B + 44, s23); #endif } + +#endif /* !__AVR__ */ diff --git a/spook/Implementations/crypto_aead/spook128mu512v1/rhys/internal-spook.h b/spook/Implementations/crypto_aead/spook128mu512v1/rhys/internal-spook.h index b08ce80..77c8b86 100644 --- a/spook/Implementations/crypto_aead/spook128mu512v1/rhys/internal-spook.h +++ b/spook/Implementations/crypto_aead/spook128mu512v1/rhys/internal-spook.h @@ -93,31 +93,31 @@ typedef union * \brief Encrypts a block with the Clyde-128 block cipher. * * \param key Points to the key to encrypt with. - * \param tweak Points to the tweak to encrypt with. * \param output Output buffer for the ciphertext. * \param input Input buffer for the plaintext. + * \param tweak Points to the tweak to encrypt with. * * \sa clyde128_decrypt() */ void clyde128_encrypt(const unsigned char key[CLYDE128_KEY_SIZE], - const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4], uint32_t output[CLYDE128_BLOCK_SIZE / 4], - const uint32_t input[CLYDE128_BLOCK_SIZE / 4]); + const uint32_t input[CLYDE128_BLOCK_SIZE / 4], + const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4]); /** * \brief Decrypts a block with the Clyde-128 block cipher. * * \param key Points to the key to decrypt with. - * \param tweak Points to the tweak to decrypt with. * \param output Output buffer for the plaintext. * \param input Input buffer for the ciphertext. + * \param tweak Points to the tweak to decrypt with. * * \sa clyde128_encrypt() */ void clyde128_decrypt(const unsigned char key[CLYDE128_KEY_SIZE], - const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4], uint32_t output[CLYDE128_BLOCK_SIZE / 4], - const unsigned char input[CLYDE128_BLOCK_SIZE]); + const unsigned char input[CLYDE128_BLOCK_SIZE], + const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4]); /** * \brief Performs the Shadow-512 permutation on a state. diff --git a/spook/Implementations/crypto_aead/spook128mu512v1/rhys/spook.c b/spook/Implementations/crypto_aead/spook128mu512v1/rhys/spook.c index d075b33..2dbab94 100644 --- a/spook/Implementations/crypto_aead/spook128mu512v1/rhys/spook.c +++ b/spook/Implementations/crypto_aead/spook128mu512v1/rhys/spook.c @@ -86,7 +86,7 @@ static void spook_128_512_init state->B[CLYDE128_BLOCK_SIZE - 1] |= 0x40; } memcpy(state->B + CLYDE128_BLOCK_SIZE, npub, CLYDE128_BLOCK_SIZE); - clyde128_encrypt(k, state->W, state->W + 12, state->W + 4); + clyde128_encrypt(k, state->W + 12, state->W + 4, state->W); shadow512(state); } @@ -111,7 +111,7 @@ static void spook_128_384_init state->B[CLYDE128_BLOCK_SIZE - 1] |= 0x40; } memcpy(state->B + CLYDE128_BLOCK_SIZE, npub, CLYDE128_BLOCK_SIZE); - clyde128_encrypt(k, state->W, state->W + 8, state->W + 4); + clyde128_encrypt(k, state->W + 8, state->W + 4, state->W); shadow384(state); } @@ -310,7 +310,7 @@ int spook_128_512_su_aead_encrypt /* Compute the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_encrypt(k, state.W + 4, state.W, state.W); + clyde128_encrypt(k, state.W, state.W, state.W + 4); memcpy(c + mlen, state.B, SPOOK_TAG_SIZE); return 0; } @@ -345,7 +345,7 @@ int spook_128_512_su_aead_decrypt /* Check the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_decrypt(k, state.W + 4, state.W + 4, c + clen); + clyde128_decrypt(k, state.W + 4, c + clen, state.W + 4); return aead_check_tag (m, clen, state.B, state.B + CLYDE128_BLOCK_SIZE, SPOOK_TAG_SIZE); } @@ -377,7 +377,7 @@ int spook_128_384_su_aead_encrypt /* Compute the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_encrypt(k, state.W + 4, state.W, state.W); + clyde128_encrypt(k, state.W, state.W, state.W + 4); memcpy(c + mlen, state.B, SPOOK_TAG_SIZE); return 0; } @@ -412,7 +412,7 @@ int spook_128_384_su_aead_decrypt /* Check the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_decrypt(k, state.W + 4, state.W + 4, c + clen); + clyde128_decrypt(k, state.W + 4, c + clen, state.W + 4); return aead_check_tag (m, clen, state.B, state.B + CLYDE128_BLOCK_SIZE, SPOOK_TAG_SIZE); } @@ -444,7 +444,7 @@ int spook_128_512_mu_aead_encrypt /* Compute the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_encrypt(k, state.W + 4, state.W, state.W); + clyde128_encrypt(k, state.W, state.W, state.W + 4); memcpy(c + mlen, state.B, SPOOK_TAG_SIZE); return 0; } @@ -479,7 +479,7 @@ int spook_128_512_mu_aead_decrypt /* Check the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_decrypt(k, state.W + 4, state.W + 4, c + clen); + clyde128_decrypt(k, state.W + 4, c + clen, state.W + 4); return aead_check_tag (m, clen, state.B, state.B + CLYDE128_BLOCK_SIZE, SPOOK_TAG_SIZE); } @@ -511,7 +511,7 @@ int spook_128_384_mu_aead_encrypt /* Compute the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_encrypt(k, state.W + 4, state.W, state.W); + clyde128_encrypt(k, state.W, state.W, state.W + 4); memcpy(c + mlen, state.B, SPOOK_TAG_SIZE); return 0; } @@ -546,7 +546,7 @@ int spook_128_384_mu_aead_decrypt /* Check the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_decrypt(k, state.W + 4, state.W + 4, c + clen); + clyde128_decrypt(k, state.W + 4, c + clen, state.W + 4); return aead_check_tag (m, clen, state.B, state.B + CLYDE128_BLOCK_SIZE, SPOOK_TAG_SIZE); } diff --git a/spook/Implementations/crypto_aead/spook128su384v1/rhys/internal-spook-avr.S b/spook/Implementations/crypto_aead/spook128su384v1/rhys/internal-spook-avr.S new file mode 100644 index 0000000..05ca51e --- /dev/null +++ b/spook/Implementations/crypto_aead/spook128su384v1/rhys/internal-spook-avr.S @@ -0,0 +1,4898 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .text +.global clyde128_encrypt + .type clyde128_encrypt, @function +clyde128_encrypt: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r23 + push r22 + movw r30,r24 + movw r26,r20 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 36 + ld r20,X+ + ld r21,X+ + ld r22,X+ + ld r23,X+ + ld r2,X+ + ld r3,X+ + ld r4,X+ + ld r5,X+ + ld r6,X+ + ld r7,X+ + ld r8,X+ + ld r9,X+ + ld r10,X+ + ld r11,X+ + ld r12,X+ + ld r13,X+ + ld r0,Z + eor r20,r0 + ldd r0,Z+1 + eor r21,r0 + ldd r0,Z+2 + eor r22,r0 + ldd r0,Z+3 + eor r23,r0 + ldd r0,Z+4 + eor r2,r0 + ldd r0,Z+5 + eor r3,r0 + ldd r0,Z+6 + eor r4,r0 + ldd r0,Z+7 + eor r5,r0 + ldd r0,Z+8 + eor r6,r0 + ldd r0,Z+9 + eor r7,r0 + ldd r0,Z+10 + eor r8,r0 + ldd r0,Z+11 + eor r9,r0 + ldd r0,Z+12 + eor r10,r0 + ldd r0,Z+13 + eor r11,r0 + ldd r0,Z+14 + eor r12,r0 + ldd r0,Z+15 + eor r13,r0 + movw r26,r18 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+1,r18 + std Y+2,r19 + std Y+3,r14 + std Y+4,r15 + eor r20,r18 + eor r21,r19 + eor r22,r14 + eor r23,r15 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+5,r18 + std Y+6,r19 + std Y+7,r14 + std Y+8,r15 + eor r2,r18 + eor r3,r19 + eor r4,r14 + eor r5,r15 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+9,r18 + std Y+10,r19 + std Y+11,r14 + std Y+12,r15 + eor r6,r18 + eor r7,r19 + eor r8,r14 + eor r9,r15 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+13,r18 + std Y+14,r19 + std Y+15,r14 + std Y+16,r15 + eor r10,r18 + eor r11,r19 + eor r12,r14 + eor r13,r15 + rcall 151f + ldi r27,1 + eor r20,r27 + rcall 151f + ldi r27,1 + eor r2,r27 + rcall 635f + rcall 151f + ldi r27,1 + eor r6,r27 + rcall 151f + ldi r27,1 + eor r10,r27 + rcall 635f + rcall 151f + ldi r27,1 + eor r20,r27 + eor r2,r27 + rcall 151f + ldi r27,1 + eor r2,r27 + eor r6,r27 + rcall 635f + rcall 151f + ldi r27,1 + eor r6,r27 + eor r10,r27 + rcall 151f + ldi r27,1 + eor r20,r27 + eor r2,r27 + eor r10,r27 + rcall 635f + rcall 151f + ldi r27,1 + eor r20,r27 + eor r6,r27 + rcall 151f + ldi r27,1 + eor r2,r27 + eor r10,r27 + rcall 635f + rcall 151f + ldi r27,1 + eor r20,r27 + eor r2,r27 + eor r6,r27 + rcall 151f + ldi r27,1 + eor r2,r27 + eor r6,r27 + eor r10,r27 + rcall 635f + rjmp 725f +151: + movw r18,r20 + movw r14,r22 + and r18,r2 + and r19,r3 + and r14,r4 + and r15,r5 + eor r18,r6 + eor r19,r7 + eor r14,r8 + eor r15,r9 + movw r24,r10 + movw r16,r12 + and r24,r20 + and r25,r21 + and r16,r22 + and r17,r23 + eor r24,r2 + eor r25,r3 + eor r16,r4 + eor r17,r5 + movw r6,r18 + movw r8,r14 + and r6,r24 + and r7,r25 + and r8,r16 + and r9,r17 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + and r10,r18 + and r11,r19 + and r12,r14 + and r13,r15 + eor r10,r20 + eor r11,r21 + eor r12,r22 + eor r13,r23 + movw r20,r24 + movw r22,r16 + movw r2,r18 + movw r4,r14 + mov r18,r21 + mov r19,r22 + mov r14,r23 + mov r15,r20 + eor r0,r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + or r15,r0 + eor r18,r20 + eor r19,r21 + eor r14,r22 + eor r15,r23 + mov r24,r3 + mov r25,r4 + mov r16,r5 + mov r17,r2 + eor r0,r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r24,r2 + eor r25,r3 + eor r16,r4 + eor r17,r5 + movw r26,r18 + mov r1,r14 + mov r0,r15 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r18,r26 + eor r19,r27 + eor r14,r1 + eor r15,r0 + movw r26,r24 + mov r1,r16 + mov r0,r17 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r24,r26 + eor r25,r27 + eor r16,r1 + eor r17,r0 + mov r0,r22 + mov r22,r20 + mov r20,r0 + mov r0,r23 + mov r23,r21 + mov r21,r0 + bst r20,0 + lsr r23 + ror r22 + ror r21 + ror r20 + bld r23,7 + eor r20,r18 + eor r21,r19 + eor r22,r14 + eor r23,r15 + mov r0,r4 + mov r4,r2 + mov r2,r0 + mov r0,r5 + mov r5,r3 + mov r3,r0 + bst r2,0 + lsr r5 + ror r4 + ror r3 + ror r2 + bld r5,7 + eor r2,r24 + eor r3,r25 + eor r4,r16 + eor r5,r17 + movw r18,r20 + movw r14,r22 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + eor r18,r20 + eor r19,r21 + eor r14,r22 + eor r15,r23 + movw r24,r2 + movw r16,r4 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + eor r24,r2 + eor r25,r3 + eor r16,r4 + eor r17,r5 + mov r26,r17 + mov r27,r24 + mov r1,r25 + mov r0,r16 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r20,r26 + eor r21,r27 + eor r22,r1 + eor r23,r0 + mov r26,r15 + mov r27,r18 + mov r1,r19 + mov r0,r14 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r2,r26 + eor r3,r27 + eor r4,r1 + eor r5,r0 + bst r19,7 + lsl r14 + rol r15 + rol r18 + rol r19 + bld r14,0 + eor r20,r14 + eor r21,r15 + eor r22,r18 + eor r23,r19 + bst r25,7 + lsl r16 + rol r17 + rol r24 + rol r25 + bld r16,0 + eor r2,r16 + eor r3,r17 + eor r4,r24 + eor r5,r25 + mov r18,r7 + mov r19,r8 + mov r14,r9 + mov r15,r6 + eor r0,r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + or r15,r0 + eor r18,r6 + eor r19,r7 + eor r14,r8 + eor r15,r9 + mov r24,r11 + mov r25,r12 + mov r16,r13 + mov r17,r10 + eor r0,r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r24,r10 + eor r25,r11 + eor r16,r12 + eor r17,r13 + movw r26,r18 + mov r1,r14 + mov r0,r15 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r18,r26 + eor r19,r27 + eor r14,r1 + eor r15,r0 + movw r26,r24 + mov r1,r16 + mov r0,r17 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r24,r26 + eor r25,r27 + eor r16,r1 + eor r17,r0 + mov r0,r8 + mov r8,r6 + mov r6,r0 + mov r0,r9 + mov r9,r7 + mov r7,r0 + bst r6,0 + lsr r9 + ror r8 + ror r7 + ror r6 + bld r9,7 + eor r6,r18 + eor r7,r19 + eor r8,r14 + eor r9,r15 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + bst r10,0 + lsr r13 + ror r12 + ror r11 + ror r10 + bld r13,7 + eor r10,r24 + eor r11,r25 + eor r12,r16 + eor r13,r17 + movw r18,r6 + movw r14,r8 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + eor r18,r6 + eor r19,r7 + eor r14,r8 + eor r15,r9 + movw r24,r10 + movw r16,r12 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + eor r24,r10 + eor r25,r11 + eor r16,r12 + eor r17,r13 + mov r26,r17 + mov r27,r24 + mov r1,r25 + mov r0,r16 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r6,r26 + eor r7,r27 + eor r8,r1 + eor r9,r0 + mov r26,r15 + mov r27,r18 + mov r1,r19 + mov r0,r14 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r10,r26 + eor r11,r27 + eor r12,r1 + eor r13,r0 + bst r19,7 + lsl r14 + rol r15 + rol r18 + rol r19 + bld r14,0 + eor r6,r14 + eor r7,r15 + eor r8,r18 + eor r9,r19 + bst r25,7 + lsl r16 + rol r17 + rol r24 + rol r25 + bld r16,0 + eor r10,r16 + eor r11,r17 + eor r12,r24 + eor r13,r25 + ret +635: + ldd r18,Y+9 + ldd r19,Y+10 + ldd r14,Y+11 + ldd r15,Y+12 + ldd r24,Y+1 + ldd r25,Y+2 + ldd r16,Y+3 + ldd r17,Y+4 + eor r18,r24 + eor r19,r25 + eor r14,r16 + eor r15,r17 + std Y+9,r24 + std Y+10,r25 + std Y+11,r16 + std Y+12,r17 + std Y+1,r18 + std Y+2,r19 + std Y+3,r14 + std Y+4,r15 + eor r20,r18 + eor r21,r19 + eor r22,r14 + eor r23,r15 + eor r6,r24 + eor r7,r25 + eor r8,r16 + eor r9,r17 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r14,Y+15 + ldd r15,Y+16 + ldd r24,Y+5 + ldd r25,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + eor r18,r24 + eor r19,r25 + eor r14,r16 + eor r15,r17 + std Y+13,r24 + std Y+14,r25 + std Y+15,r16 + std Y+16,r17 + std Y+5,r18 + std Y+6,r19 + std Y+7,r14 + std Y+8,r15 + eor r2,r18 + eor r3,r19 + eor r4,r14 + eor r5,r15 + eor r10,r24 + eor r11,r25 + eor r12,r16 + eor r13,r17 + ld r0,Z + eor r20,r0 + ldd r0,Z+1 + eor r21,r0 + ldd r0,Z+2 + eor r22,r0 + ldd r0,Z+3 + eor r23,r0 + ldd r0,Z+4 + eor r2,r0 + ldd r0,Z+5 + eor r3,r0 + ldd r0,Z+6 + eor r4,r0 + ldd r0,Z+7 + eor r5,r0 + ldd r0,Z+8 + eor r6,r0 + ldd r0,Z+9 + eor r7,r0 + ldd r0,Z+10 + eor r8,r0 + ldd r0,Z+11 + eor r9,r0 + ldd r0,Z+12 + eor r10,r0 + ldd r0,Z+13 + eor r11,r0 + ldd r0,Z+14 + eor r12,r0 + ldd r0,Z+15 + eor r13,r0 + ret +725: + ldd r26,Y+17 + ldd r27,Y+18 + st X+,r20 + st X+,r21 + st X+,r22 + st X+,r23 + st X+,r2 + st X+,r3 + st X+,r4 + st X+,r5 + st X+,r6 + st X+,r7 + st X+,r8 + st X+,r9 + st X+,r10 + st X+,r11 + st X+,r12 + st X+,r13 + adiw r28,18 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size clyde128_encrypt, .-clyde128_encrypt + + .text +.global clyde128_decrypt + .type clyde128_decrypt, @function +clyde128_decrypt: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r23 + push r22 + movw r30,r24 + movw r26,r20 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 36 + ld r20,X+ + ld r21,X+ + ld r22,X+ + ld r23,X+ + ld r2,X+ + ld r3,X+ + ld r4,X+ + ld r5,X+ + ld r6,X+ + ld r7,X+ + ld r8,X+ + ld r9,X+ + ld r10,X+ + ld r11,X+ + ld r12,X+ + ld r13,X+ + movw r26,r18 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+1,r18 + std Y+2,r19 + std Y+3,r14 + std Y+4,r15 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+5,r18 + std Y+6,r19 + std Y+7,r14 + std Y+8,r15 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+9,r18 + std Y+10,r19 + std Y+11,r14 + std Y+12,r15 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+13,r18 + std Y+14,r19 + std Y+15,r14 + std Y+16,r15 + rcall 533f + ldi r27,1 + eor r2,r27 + eor r6,r27 + eor r10,r27 + rcall 103f + ldi r27,1 + eor r20,r27 + eor r2,r27 + eor r6,r27 + rcall 103f + rcall 533f + ldi r27,1 + eor r2,r27 + eor r10,r27 + rcall 103f + ldi r27,1 + eor r20,r27 + eor r6,r27 + rcall 103f + rcall 533f + ldi r27,1 + eor r20,r27 + eor r2,r27 + eor r10,r27 + rcall 103f + ldi r27,1 + eor r6,r27 + eor r10,r27 + rcall 103f + rcall 533f + ldi r27,1 + eor r2,r27 + eor r6,r27 + rcall 103f + ldi r27,1 + eor r20,r27 + eor r2,r27 + rcall 103f + rcall 533f + ldi r27,1 + eor r10,r27 + rcall 103f + ldi r27,1 + eor r6,r27 + rcall 103f + rcall 533f + ldi r27,1 + eor r2,r27 + rcall 103f + ldi r27,1 + eor r20,r27 + rcall 103f + rjmp 623f +103: + mov r18,r23 + mov r19,r20 + mov r14,r21 + mov r15,r22 + bst r18,0 + lsr r15 + ror r14 + ror r19 + ror r18 + bld r15,7 + eor r18,r20 + eor r19,r21 + eor r14,r22 + eor r15,r23 + mov r24,r5 + mov r25,r2 + mov r16,r3 + mov r17,r4 + bst r24,0 + lsr r17 + ror r16 + ror r25 + ror r24 + bld r17,7 + eor r24,r2 + eor r25,r3 + eor r16,r4 + eor r17,r5 + movw r26,r18 + mov r1,r14 + mov r0,r15 + bst r0,7 + lsl r26 + rol r27 + rol r1 + rol r0 + bld r26,0 + eor r20,r26 + eor r21,r27 + eor r22,r1 + eor r23,r0 + movw r26,r24 + mov r1,r16 + mov r0,r17 + bst r0,7 + lsl r26 + rol r27 + rol r1 + rol r0 + bld r26,0 + eor r2,r26 + eor r3,r27 + eor r4,r1 + eor r5,r0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + eor r20,r15 + eor r21,r18 + eor r22,r19 + eor r23,r14 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + eor r2,r17 + eor r3,r24 + eor r4,r25 + eor r5,r16 + movw r18,r20 + movw r14,r22 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + eor r18,r20 + eor r19,r21 + eor r14,r22 + eor r15,r23 + movw r24,r2 + movw r16,r4 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + eor r24,r2 + eor r25,r3 + eor r16,r4 + eor r17,r5 + mov r26,r17 + mov r27,r24 + mov r1,r25 + mov r0,r16 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r20,r26 + eor r21,r27 + eor r22,r1 + eor r23,r0 + mov r26,r15 + mov r27,r18 + mov r1,r19 + mov r0,r14 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r2,r26 + eor r3,r27 + eor r4,r1 + eor r5,r0 + bst r20,0 + lsr r23 + ror r22 + ror r21 + ror r20 + bld r23,7 + eor r18,r22 + eor r19,r23 + eor r14,r20 + eor r15,r21 + bst r2,0 + lsr r5 + ror r4 + ror r3 + ror r2 + bld r5,7 + eor r24,r4 + eor r25,r5 + eor r16,r2 + eor r17,r3 + movw r20,r14 + movw r22,r18 + movw r2,r16 + movw r4,r24 + mov r18,r9 + mov r19,r6 + mov r14,r7 + mov r15,r8 + bst r18,0 + lsr r15 + ror r14 + ror r19 + ror r18 + bld r15,7 + eor r18,r6 + eor r19,r7 + eor r14,r8 + eor r15,r9 + mov r24,r13 + mov r25,r10 + mov r16,r11 + mov r17,r12 + bst r24,0 + lsr r17 + ror r16 + ror r25 + ror r24 + bld r17,7 + eor r24,r10 + eor r25,r11 + eor r16,r12 + eor r17,r13 + movw r26,r18 + mov r1,r14 + mov r0,r15 + bst r0,7 + lsl r26 + rol r27 + rol r1 + rol r0 + bld r26,0 + eor r6,r26 + eor r7,r27 + eor r8,r1 + eor r9,r0 + movw r26,r24 + mov r1,r16 + mov r0,r17 + bst r0,7 + lsl r26 + rol r27 + rol r1 + rol r0 + bld r26,0 + eor r10,r26 + eor r11,r27 + eor r12,r1 + eor r13,r0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + eor r6,r15 + eor r7,r18 + eor r8,r19 + eor r9,r14 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + eor r10,r17 + eor r11,r24 + eor r12,r25 + eor r13,r16 + movw r18,r6 + movw r14,r8 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + eor r18,r6 + eor r19,r7 + eor r14,r8 + eor r15,r9 + movw r24,r10 + movw r16,r12 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + eor r24,r10 + eor r25,r11 + eor r16,r12 + eor r17,r13 + mov r26,r17 + mov r27,r24 + mov r1,r25 + mov r0,r16 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r6,r26 + eor r7,r27 + eor r8,r1 + eor r9,r0 + mov r26,r15 + mov r27,r18 + mov r1,r19 + mov r0,r14 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r10,r26 + eor r11,r27 + eor r12,r1 + eor r13,r0 + bst r6,0 + lsr r9 + ror r8 + ror r7 + ror r6 + bld r9,7 + eor r18,r8 + eor r19,r9 + eor r14,r6 + eor r15,r7 + bst r10,0 + lsr r13 + ror r12 + ror r11 + ror r10 + bld r13,7 + eor r24,r12 + eor r25,r13 + eor r16,r10 + eor r17,r11 + movw r6,r14 + movw r8,r18 + movw r10,r16 + movw r12,r24 + movw r24,r20 + movw r16,r22 + and r24,r2 + and r25,r3 + and r16,r4 + and r17,r5 + eor r24,r6 + eor r25,r7 + eor r16,r8 + eor r17,r9 + movw r18,r2 + movw r14,r4 + and r18,r24 + and r19,r25 + and r14,r16 + and r15,r17 + eor r18,r10 + eor r19,r11 + eor r14,r12 + eor r15,r13 + movw r10,r24 + movw r12,r16 + and r24,r18 + and r25,r19 + and r16,r14 + and r17,r15 + eor r24,r20 + eor r25,r21 + eor r16,r22 + eor r17,r23 + movw r6,r18 + movw r8,r14 + and r6,r24 + and r7,r25 + and r8,r16 + and r9,r17 + eor r6,r2 + eor r7,r3 + eor r8,r4 + eor r9,r5 + movw r20,r18 + movw r22,r14 + movw r2,r24 + movw r4,r16 + ret +533: + ld r0,Z + eor r20,r0 + ldd r0,Z+1 + eor r21,r0 + ldd r0,Z+2 + eor r22,r0 + ldd r0,Z+3 + eor r23,r0 + ldd r0,Z+4 + eor r2,r0 + ldd r0,Z+5 + eor r3,r0 + ldd r0,Z+6 + eor r4,r0 + ldd r0,Z+7 + eor r5,r0 + ldd r0,Z+8 + eor r6,r0 + ldd r0,Z+9 + eor r7,r0 + ldd r0,Z+10 + eor r8,r0 + ldd r0,Z+11 + eor r9,r0 + ldd r0,Z+12 + eor r10,r0 + ldd r0,Z+13 + eor r11,r0 + ldd r0,Z+14 + eor r12,r0 + ldd r0,Z+15 + eor r13,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r14,Y+3 + ldd r15,Y+4 + ldd r24,Y+9 + ldd r25,Y+10 + ldd r16,Y+11 + ldd r17,Y+12 + eor r20,r18 + eor r21,r19 + eor r22,r14 + eor r23,r15 + eor r6,r24 + eor r7,r25 + eor r8,r16 + eor r9,r17 + eor r18,r24 + eor r19,r25 + eor r14,r16 + eor r15,r17 + std Y+1,r24 + std Y+2,r25 + std Y+3,r16 + std Y+4,r17 + std Y+9,r18 + std Y+10,r19 + std Y+11,r14 + std Y+12,r15 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r14,Y+7 + ldd r15,Y+8 + ldd r24,Y+13 + ldd r25,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + eor r2,r18 + eor r3,r19 + eor r4,r14 + eor r5,r15 + eor r10,r24 + eor r11,r25 + eor r12,r16 + eor r13,r17 + eor r18,r24 + eor r19,r25 + eor r14,r16 + eor r15,r17 + std Y+5,r24 + std Y+6,r25 + std Y+7,r16 + std Y+8,r17 + std Y+13,r18 + std Y+14,r19 + std Y+15,r14 + std Y+16,r15 + ret +623: + ld r0,Z + eor r20,r0 + ldd r0,Z+1 + eor r21,r0 + ldd r0,Z+2 + eor r22,r0 + ldd r0,Z+3 + eor r23,r0 + ldd r0,Z+4 + eor r2,r0 + ldd r0,Z+5 + eor r3,r0 + ldd r0,Z+6 + eor r4,r0 + ldd r0,Z+7 + eor r5,r0 + ldd r0,Z+8 + eor r6,r0 + ldd r0,Z+9 + eor r7,r0 + ldd r0,Z+10 + eor r8,r0 + ldd r0,Z+11 + eor r9,r0 + ldd r0,Z+12 + eor r10,r0 + ldd r0,Z+13 + eor r11,r0 + ldd r0,Z+14 + eor r12,r0 + ldd r0,Z+15 + eor r13,r0 + ldd r0,Y+1 + eor r20,r0 + ldd r0,Y+2 + eor r21,r0 + ldd r0,Y+3 + eor r22,r0 + ldd r0,Y+4 + eor r23,r0 + ldd r0,Y+5 + eor r2,r0 + ldd r0,Y+6 + eor r3,r0 + ldd r0,Y+7 + eor r4,r0 + ldd r0,Y+8 + eor r5,r0 + ldd r0,Y+9 + eor r6,r0 + ldd r0,Y+10 + eor r7,r0 + ldd r0,Y+11 + eor r8,r0 + ldd r0,Y+12 + eor r9,r0 + ldd r0,Y+13 + eor r10,r0 + ldd r0,Y+14 + eor r11,r0 + ldd r0,Y+15 + eor r12,r0 + ldd r0,Y+16 + eor r13,r0 + ldd r26,Y+17 + ldd r27,Y+18 + st X+,r20 + st X+,r21 + st X+,r22 + st X+,r23 + st X+,r2 + st X+,r3 + st X+,r4 + st X+,r5 + st X+,r6 + st X+,r7 + st X+,r8 + st X+,r9 + st X+,r10 + st X+,r11 + st X+,r12 + st X+,r13 + adiw r28,18 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size clyde128_decrypt, .-clyde128_decrypt + + .text +.global shadow512 + .type shadow512, @function +shadow512: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 1083f + rcall 1127f + ldi r25,1 + eor r18,r25 + rcall 1083f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 1083f + rcall 1127f + ldi r25,2 + eor r18,r25 + rcall 1083f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 1083f + rcall 1127f + ldi r25,4 + eor r18,r25 + rcall 1083f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + ldd r18,Z+48 + ldd r19,Z+49 + ldd r20,Z+50 + ldd r21,Z+51 + ldd r22,Z+52 + ldd r23,Z+53 + ldd r26,Z+54 + ldd r27,Z+55 + ldd r28,Z+56 + ldd r29,Z+57 + ldd r2,Z+58 + ldd r3,Z+59 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + rcall 1083f + rcall 1127f + ldi r25,8 + eor r18,r25 + rcall 1083f + std Z+48,r18 + std Z+49,r19 + std Z+50,r20 + std Z+51,r21 + std Z+52,r22 + std Z+53,r23 + std Z+54,r26 + std Z+55,r27 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + std Z+60,r4 + std Z+61,r5 + std Z+62,r6 + std Z+63,r7 + rcall 1553f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+52 + ldi r16,8 + eor r18,r16 + std Z+52,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 1083f + rcall 1127f + ldi r25,1 + eor r28,r25 + rcall 1083f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 1083f + rcall 1127f + ldi r25,2 + eor r28,r25 + rcall 1083f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 1083f + rcall 1127f + ldi r25,4 + eor r28,r25 + rcall 1083f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + ldd r18,Z+48 + ldd r19,Z+49 + ldd r20,Z+50 + ldd r21,Z+51 + ldd r22,Z+52 + ldd r23,Z+53 + ldd r26,Z+54 + ldd r27,Z+55 + ldd r28,Z+56 + ldd r29,Z+57 + ldd r2,Z+58 + ldd r3,Z+59 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + rcall 1083f + rcall 1127f + ldi r25,8 + eor r28,r25 + rcall 1083f + std Z+48,r18 + std Z+49,r19 + std Z+50,r20 + std Z+51,r21 + std Z+52,r22 + std Z+53,r23 + std Z+54,r26 + std Z+55,r27 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + std Z+60,r4 + std Z+61,r5 + std Z+62,r6 + std Z+63,r7 + rcall 1553f + ldd r18,Z+12 + ldi r25,1 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+28 + ldi r24,2 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+44 + ldi r17,4 + eor r18,r17 + std Z+44,r18 + ldd r18,Z+60 + ldi r16,8 + eor r18,r16 + std Z+60,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 1083f + rcall 1127f + ldi r25,1 + eor r18,r25 + eor r22,r25 + rcall 1083f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 1083f + rcall 1127f + ldi r25,2 + eor r18,r25 + eor r22,r25 + rcall 1083f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 1083f + rcall 1127f + ldi r25,4 + eor r18,r25 + eor r22,r25 + rcall 1083f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + ldd r18,Z+48 + ldd r19,Z+49 + ldd r20,Z+50 + ldd r21,Z+51 + ldd r22,Z+52 + ldd r23,Z+53 + ldd r26,Z+54 + ldd r27,Z+55 + ldd r28,Z+56 + ldd r29,Z+57 + ldd r2,Z+58 + ldd r3,Z+59 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + rcall 1083f + rcall 1127f + ldi r25,8 + eor r18,r25 + eor r22,r25 + rcall 1083f + std Z+48,r18 + std Z+49,r19 + std Z+50,r20 + std Z+51,r21 + std Z+52,r22 + std Z+53,r23 + std Z+54,r26 + std Z+55,r27 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + std Z+60,r4 + std Z+61,r5 + std Z+62,r6 + std Z+63,r7 + rcall 1553f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+8 + eor r18,r25 + std Z+8,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+24 + eor r18,r24 + std Z+24,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+40 + eor r18,r17 + std Z+40,r18 + ldd r18,Z+52 + ldi r16,8 + eor r18,r16 + std Z+52,r18 + ldd r18,Z+56 + eor r18,r16 + std Z+56,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 1083f + rcall 1127f + ldi r25,1 + eor r28,r25 + eor r4,r25 + rcall 1083f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 1083f + rcall 1127f + ldi r25,2 + eor r28,r25 + eor r4,r25 + rcall 1083f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 1083f + rcall 1127f + ldi r25,4 + eor r28,r25 + eor r4,r25 + rcall 1083f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + ldd r18,Z+48 + ldd r19,Z+49 + ldd r20,Z+50 + ldd r21,Z+51 + ldd r22,Z+52 + ldd r23,Z+53 + ldd r26,Z+54 + ldd r27,Z+55 + ldd r28,Z+56 + ldd r29,Z+57 + ldd r2,Z+58 + ldd r3,Z+59 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + rcall 1083f + rcall 1127f + ldi r25,8 + eor r28,r25 + eor r4,r25 + rcall 1083f + std Z+48,r18 + std Z+49,r19 + std Z+50,r20 + std Z+51,r21 + std Z+52,r22 + std Z+53,r23 + std Z+54,r26 + std Z+55,r27 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + std Z+60,r4 + std Z+61,r5 + std Z+62,r6 + std Z+63,r7 + rcall 1553f + ld r18,Z + ldi r25,1 + eor r18,r25 + st Z,r18 + ldd r18,Z+4 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+12 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+16 + ldi r24,2 + eor r18,r24 + std Z+16,r18 + ldd r18,Z+20 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+28 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+32 + ldi r17,4 + eor r18,r17 + std Z+32,r18 + ldd r18,Z+36 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+44 + eor r18,r17 + std Z+44,r18 + ldd r18,Z+48 + ldi r16,8 + eor r18,r16 + std Z+48,r18 + ldd r18,Z+52 + eor r18,r16 + std Z+52,r18 + ldd r18,Z+60 + eor r18,r16 + std Z+60,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 1083f + rcall 1127f + ldi r25,1 + eor r18,r25 + eor r28,r25 + rcall 1083f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 1083f + rcall 1127f + ldi r25,2 + eor r18,r25 + eor r28,r25 + rcall 1083f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 1083f + rcall 1127f + ldi r25,4 + eor r18,r25 + eor r28,r25 + rcall 1083f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + ldd r18,Z+48 + ldd r19,Z+49 + ldd r20,Z+50 + ldd r21,Z+51 + ldd r22,Z+52 + ldd r23,Z+53 + ldd r26,Z+54 + ldd r27,Z+55 + ldd r28,Z+56 + ldd r29,Z+57 + ldd r2,Z+58 + ldd r3,Z+59 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + rcall 1083f + rcall 1127f + ldi r25,8 + eor r18,r25 + eor r28,r25 + rcall 1083f + std Z+48,r18 + std Z+49,r19 + std Z+50,r20 + std Z+51,r21 + std Z+52,r22 + std Z+53,r23 + std Z+54,r26 + std Z+55,r27 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + std Z+60,r4 + std Z+61,r5 + std Z+62,r6 + std Z+63,r7 + rcall 1553f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+12 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+28 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+44 + eor r18,r17 + std Z+44,r18 + ldd r18,Z+52 + ldi r16,8 + eor r18,r16 + std Z+52,r18 + ldd r18,Z+60 + eor r18,r16 + std Z+60,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 1083f + rcall 1127f + ldi r25,1 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 1083f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 1083f + rcall 1127f + ldi r25,2 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 1083f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 1083f + rcall 1127f + ldi r25,4 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 1083f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + ldd r18,Z+48 + ldd r19,Z+49 + ldd r20,Z+50 + ldd r21,Z+51 + ldd r22,Z+52 + ldd r23,Z+53 + ldd r26,Z+54 + ldd r27,Z+55 + ldd r28,Z+56 + ldd r29,Z+57 + ldd r2,Z+58 + ldd r3,Z+59 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + rcall 1083f + rcall 1127f + ldi r25,8 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 1083f + std Z+48,r18 + std Z+49,r19 + std Z+50,r20 + std Z+51,r21 + std Z+52,r22 + std Z+53,r23 + std Z+54,r26 + std Z+55,r27 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + std Z+60,r4 + std Z+61,r5 + std Z+62,r6 + std Z+63,r7 + rcall 1553f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+8 + eor r18,r25 + std Z+8,r18 + ldd r18,Z+12 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+24 + eor r18,r24 + std Z+24,r18 + ldd r18,Z+28 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+40 + eor r18,r17 + std Z+40,r18 + ldd r18,Z+44 + eor r18,r17 + std Z+44,r18 + ldd r18,Z+52 + ldi r16,8 + eor r18,r16 + std Z+52,r18 + ldd r18,Z+56 + eor r18,r16 + std Z+56,r18 + ldd r18,Z+60 + eor r18,r16 + std Z+60,r18 + rjmp 1795f +1083: + movw r8,r18 + movw r10,r20 + and r8,r22 + and r9,r23 + and r10,r26 + and r11,r27 + eor r8,r28 + eor r9,r29 + eor r10,r2 + eor r11,r3 + movw r12,r4 + movw r14,r6 + and r12,r18 + and r13,r19 + and r14,r20 + and r15,r21 + eor r12,r22 + eor r13,r23 + eor r14,r26 + eor r15,r27 + movw r28,r8 + movw r2,r10 + and r28,r12 + and r29,r13 + and r2,r14 + and r3,r15 + eor r28,r4 + eor r29,r5 + eor r2,r6 + eor r3,r7 + and r4,r8 + and r5,r9 + and r6,r10 + and r7,r11 + eor r4,r18 + eor r5,r19 + eor r6,r20 + eor r7,r21 + movw r18,r12 + movw r20,r14 + movw r22,r8 + movw r26,r10 + ret +1127: + mov r8,r19 + mov r9,r20 + mov r10,r21 + mov r11,r18 + mov r0,r1 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + or r11,r0 + eor r8,r18 + eor r9,r19 + eor r10,r20 + eor r11,r21 + mov r12,r23 + mov r13,r26 + mov r14,r27 + mov r15,r22 + mov r0,r1 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + or r15,r0 + eor r12,r22 + eor r13,r23 + eor r14,r26 + eor r15,r27 + movw r24,r8 + movw r16,r10 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r8,r24 + eor r9,r25 + eor r10,r16 + eor r11,r17 + movw r24,r12 + movw r16,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r12,r24 + eor r13,r25 + eor r14,r16 + eor r15,r17 + mov r0,r20 + mov r20,r18 + mov r18,r0 + mov r0,r21 + mov r21,r19 + mov r19,r0 + bst r18,0 + lsr r21 + ror r20 + ror r19 + ror r18 + bld r21,7 + eor r18,r8 + eor r19,r9 + eor r20,r10 + eor r21,r11 + mov r0,r26 + mov r26,r22 + mov r22,r0 + mov r0,r27 + mov r27,r23 + mov r23,r0 + bst r22,0 + lsr r27 + ror r26 + ror r23 + ror r22 + bld r27,7 + eor r22,r12 + eor r23,r13 + eor r26,r14 + eor r27,r15 + movw r8,r18 + movw r10,r20 + lsl r8 + rol r9 + rol r10 + rol r11 + adc r8,r1 + eor r8,r18 + eor r9,r19 + eor r10,r20 + eor r11,r21 + movw r12,r22 + movw r14,r26 + lsl r12 + rol r13 + rol r14 + rol r15 + adc r12,r1 + eor r12,r22 + eor r13,r23 + eor r14,r26 + eor r15,r27 + mov r24,r15 + mov r25,r12 + mov r16,r13 + mov r17,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r18,r24 + eor r19,r25 + eor r20,r16 + eor r21,r17 + mov r24,r11 + mov r25,r8 + mov r16,r9 + mov r17,r10 + bst r24,0 + lsr r17 + ror r16 + ror r25 + ror r24 + bld r17,7 + eor r22,r24 + eor r23,r25 + eor r26,r16 + eor r27,r17 + lsl r10 + rol r11 + rol r8 + rol r9 + adc r10,r1 + eor r18,r10 + eor r19,r11 + eor r20,r8 + eor r21,r9 + lsl r14 + rol r15 + rol r12 + rol r13 + adc r14,r1 + eor r22,r14 + eor r23,r15 + eor r26,r12 + eor r27,r13 + mov r8,r29 + mov r9,r2 + mov r10,r3 + mov r11,r28 + mov r0,r1 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + or r11,r0 + eor r8,r28 + eor r9,r29 + eor r10,r2 + eor r11,r3 + mov r12,r5 + mov r13,r6 + mov r14,r7 + mov r15,r4 + mov r0,r1 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + or r15,r0 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + movw r24,r8 + movw r16,r10 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r8,r24 + eor r9,r25 + eor r10,r16 + eor r11,r17 + movw r24,r12 + movw r16,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r12,r24 + eor r13,r25 + eor r14,r16 + eor r15,r17 + mov r0,r2 + mov r2,r28 + mov r28,r0 + mov r0,r3 + mov r3,r29 + mov r29,r0 + bst r28,0 + lsr r3 + ror r2 + ror r29 + ror r28 + bld r3,7 + eor r28,r8 + eor r29,r9 + eor r2,r10 + eor r3,r11 + mov r0,r6 + mov r6,r4 + mov r4,r0 + mov r0,r7 + mov r7,r5 + mov r5,r0 + bst r4,0 + lsr r7 + ror r6 + ror r5 + ror r4 + bld r7,7 + eor r4,r12 + eor r5,r13 + eor r6,r14 + eor r7,r15 + movw r8,r28 + movw r10,r2 + lsl r8 + rol r9 + rol r10 + rol r11 + adc r8,r1 + eor r8,r28 + eor r9,r29 + eor r10,r2 + eor r11,r3 + movw r12,r4 + movw r14,r6 + lsl r12 + rol r13 + rol r14 + rol r15 + adc r12,r1 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + mov r24,r15 + mov r25,r12 + mov r16,r13 + mov r17,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r28,r24 + eor r29,r25 + eor r2,r16 + eor r3,r17 + mov r24,r11 + mov r25,r8 + mov r16,r9 + mov r17,r10 + bst r24,0 + lsr r17 + ror r16 + ror r25 + ror r24 + bld r17,7 + eor r4,r24 + eor r5,r25 + eor r6,r16 + eor r7,r17 + lsl r10 + rol r11 + rol r8 + rol r9 + adc r10,r1 + eor r28,r10 + eor r29,r11 + eor r2,r8 + eor r3,r9 + lsl r14 + rol r15 + rol r12 + rol r13 + adc r14,r1 + eor r4,r14 + eor r5,r15 + eor r6,r12 + eor r7,r13 + ret +1553: + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+16 + ldd r23,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + ldd r28,Z+32 + ldd r29,Z+33 + ldd r2,Z+34 + ldd r3,Z+35 + ldd r4,Z+48 + ldd r5,Z+49 + ldd r6,Z+50 + ldd r7,Z+51 + movw r8,r18 + movw r10,r20 + eor r8,r22 + eor r9,r23 + eor r10,r26 + eor r11,r27 + movw r12,r28 + movw r14,r2 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + eor r18,r12 + eor r19,r13 + eor r20,r14 + eor r21,r15 + eor r22,r12 + eor r23,r13 + eor r26,r14 + eor r27,r15 + eor r28,r8 + eor r29,r9 + eor r2,r10 + eor r3,r11 + eor r4,r8 + eor r5,r9 + eor r6,r10 + eor r7,r11 + st Z,r22 + std Z+1,r23 + std Z+2,r26 + std Z+3,r27 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+32,r4 + std Z+33,r5 + std Z+34,r6 + std Z+35,r7 + std Z+48,r28 + std Z+49,r29 + std Z+50,r2 + std Z+51,r3 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r20,Z+6 + ldd r21,Z+7 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+36 + ldd r29,Z+37 + ldd r2,Z+38 + ldd r3,Z+39 + ldd r4,Z+52 + ldd r5,Z+53 + ldd r6,Z+54 + ldd r7,Z+55 + movw r8,r18 + movw r10,r20 + eor r8,r22 + eor r9,r23 + eor r10,r26 + eor r11,r27 + movw r12,r28 + movw r14,r2 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + eor r18,r12 + eor r19,r13 + eor r20,r14 + eor r21,r15 + eor r22,r12 + eor r23,r13 + eor r26,r14 + eor r27,r15 + eor r28,r8 + eor r29,r9 + eor r2,r10 + eor r3,r11 + eor r4,r8 + eor r5,r9 + eor r6,r10 + eor r7,r11 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + std Z+36,r4 + std Z+37,r5 + std Z+38,r6 + std Z+39,r7 + std Z+52,r28 + std Z+53,r29 + std Z+54,r2 + std Z+55,r3 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r20,Z+10 + ldd r21,Z+11 + ldd r22,Z+24 + ldd r23,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+56 + ldd r5,Z+57 + ldd r6,Z+58 + ldd r7,Z+59 + movw r8,r18 + movw r10,r20 + eor r8,r22 + eor r9,r23 + eor r10,r26 + eor r11,r27 + movw r12,r28 + movw r14,r2 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + eor r18,r12 + eor r19,r13 + eor r20,r14 + eor r21,r15 + eor r22,r12 + eor r23,r13 + eor r26,r14 + eor r27,r15 + eor r28,r8 + eor r29,r9 + eor r2,r10 + eor r3,r11 + eor r4,r8 + eor r5,r9 + eor r6,r10 + eor r7,r11 + std Z+8,r22 + std Z+9,r23 + std Z+10,r26 + std Z+11,r27 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + std Z+40,r4 + std Z+41,r5 + std Z+42,r6 + std Z+43,r7 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r20,Z+14 + ldd r21,Z+15 + ldd r22,Z+28 + ldd r23,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + ldd r28,Z+44 + ldd r29,Z+45 + ldd r2,Z+46 + ldd r3,Z+47 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + movw r8,r18 + movw r10,r20 + eor r8,r22 + eor r9,r23 + eor r10,r26 + eor r11,r27 + movw r12,r28 + movw r14,r2 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + eor r18,r12 + eor r19,r13 + eor r20,r14 + eor r21,r15 + eor r22,r12 + eor r23,r13 + eor r26,r14 + eor r27,r15 + eor r28,r8 + eor r29,r9 + eor r2,r10 + eor r3,r11 + eor r4,r8 + eor r5,r9 + eor r6,r10 + eor r7,r11 + std Z+12,r22 + std Z+13,r23 + std Z+14,r26 + std Z+15,r27 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + std Z+60,r28 + std Z+61,r29 + std Z+62,r2 + std Z+63,r3 + ret +1795: + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size shadow512, .-shadow512 + + .text +.global shadow384 + .type shadow384, @function +shadow384: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 814f + rcall 858f + ldi r25,1 + eor r18,r25 + rcall 814f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 814f + rcall 858f + ldi r25,2 + eor r18,r25 + rcall 814f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 814f + rcall 858f + ldi r25,4 + eor r18,r25 + rcall 814f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + rcall 1284f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 814f + rcall 858f + ldi r25,1 + eor r28,r25 + rcall 814f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 814f + rcall 858f + ldi r25,2 + eor r28,r25 + rcall 814f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 814f + rcall 858f + ldi r25,4 + eor r28,r25 + rcall 814f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + rcall 1284f + ldd r18,Z+12 + ldi r25,1 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+28 + ldi r24,2 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+44 + ldi r17,4 + eor r18,r17 + std Z+44,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 814f + rcall 858f + ldi r25,1 + eor r18,r25 + eor r22,r25 + rcall 814f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 814f + rcall 858f + ldi r25,2 + eor r18,r25 + eor r22,r25 + rcall 814f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 814f + rcall 858f + ldi r25,4 + eor r18,r25 + eor r22,r25 + rcall 814f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + rcall 1284f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+8 + eor r18,r25 + std Z+8,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+24 + eor r18,r24 + std Z+24,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+40 + eor r18,r17 + std Z+40,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 814f + rcall 858f + ldi r25,1 + eor r28,r25 + eor r4,r25 + rcall 814f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 814f + rcall 858f + ldi r25,2 + eor r28,r25 + eor r4,r25 + rcall 814f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 814f + rcall 858f + ldi r25,4 + eor r28,r25 + eor r4,r25 + rcall 814f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + rcall 1284f + ld r18,Z + ldi r25,1 + eor r18,r25 + st Z,r18 + ldd r18,Z+4 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+12 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+16 + ldi r24,2 + eor r18,r24 + std Z+16,r18 + ldd r18,Z+20 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+28 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+32 + ldi r17,4 + eor r18,r17 + std Z+32,r18 + ldd r18,Z+36 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+44 + eor r18,r17 + std Z+44,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 814f + rcall 858f + ldi r25,1 + eor r18,r25 + eor r28,r25 + rcall 814f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 814f + rcall 858f + ldi r25,2 + eor r18,r25 + eor r28,r25 + rcall 814f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 814f + rcall 858f + ldi r25,4 + eor r18,r25 + eor r28,r25 + rcall 814f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + rcall 1284f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+12 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+28 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+44 + eor r18,r17 + std Z+44,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 814f + rcall 858f + ldi r25,1 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 814f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 814f + rcall 858f + ldi r25,2 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 814f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 814f + rcall 858f + ldi r25,4 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 814f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + rcall 1284f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+8 + eor r18,r25 + std Z+8,r18 + ldd r18,Z+12 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+24 + eor r18,r24 + std Z+24,r18 + ldd r18,Z+28 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+40 + eor r18,r17 + std Z+40,r18 + ldd r18,Z+44 + eor r18,r17 + std Z+44,r18 + rjmp 1430f +814: + movw r8,r18 + movw r10,r20 + and r8,r22 + and r9,r23 + and r10,r26 + and r11,r27 + eor r8,r28 + eor r9,r29 + eor r10,r2 + eor r11,r3 + movw r12,r4 + movw r14,r6 + and r12,r18 + and r13,r19 + and r14,r20 + and r15,r21 + eor r12,r22 + eor r13,r23 + eor r14,r26 + eor r15,r27 + movw r28,r8 + movw r2,r10 + and r28,r12 + and r29,r13 + and r2,r14 + and r3,r15 + eor r28,r4 + eor r29,r5 + eor r2,r6 + eor r3,r7 + and r4,r8 + and r5,r9 + and r6,r10 + and r7,r11 + eor r4,r18 + eor r5,r19 + eor r6,r20 + eor r7,r21 + movw r18,r12 + movw r20,r14 + movw r22,r8 + movw r26,r10 + ret +858: + mov r8,r19 + mov r9,r20 + mov r10,r21 + mov r11,r18 + mov r0,r1 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + or r11,r0 + eor r8,r18 + eor r9,r19 + eor r10,r20 + eor r11,r21 + mov r12,r23 + mov r13,r26 + mov r14,r27 + mov r15,r22 + mov r0,r1 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + or r15,r0 + eor r12,r22 + eor r13,r23 + eor r14,r26 + eor r15,r27 + movw r24,r8 + movw r16,r10 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r8,r24 + eor r9,r25 + eor r10,r16 + eor r11,r17 + movw r24,r12 + movw r16,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r12,r24 + eor r13,r25 + eor r14,r16 + eor r15,r17 + mov r0,r20 + mov r20,r18 + mov r18,r0 + mov r0,r21 + mov r21,r19 + mov r19,r0 + bst r18,0 + lsr r21 + ror r20 + ror r19 + ror r18 + bld r21,7 + eor r18,r8 + eor r19,r9 + eor r20,r10 + eor r21,r11 + mov r0,r26 + mov r26,r22 + mov r22,r0 + mov r0,r27 + mov r27,r23 + mov r23,r0 + bst r22,0 + lsr r27 + ror r26 + ror r23 + ror r22 + bld r27,7 + eor r22,r12 + eor r23,r13 + eor r26,r14 + eor r27,r15 + movw r8,r18 + movw r10,r20 + lsl r8 + rol r9 + rol r10 + rol r11 + adc r8,r1 + eor r8,r18 + eor r9,r19 + eor r10,r20 + eor r11,r21 + movw r12,r22 + movw r14,r26 + lsl r12 + rol r13 + rol r14 + rol r15 + adc r12,r1 + eor r12,r22 + eor r13,r23 + eor r14,r26 + eor r15,r27 + mov r24,r15 + mov r25,r12 + mov r16,r13 + mov r17,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r18,r24 + eor r19,r25 + eor r20,r16 + eor r21,r17 + mov r24,r11 + mov r25,r8 + mov r16,r9 + mov r17,r10 + bst r24,0 + lsr r17 + ror r16 + ror r25 + ror r24 + bld r17,7 + eor r22,r24 + eor r23,r25 + eor r26,r16 + eor r27,r17 + lsl r10 + rol r11 + rol r8 + rol r9 + adc r10,r1 + eor r18,r10 + eor r19,r11 + eor r20,r8 + eor r21,r9 + lsl r14 + rol r15 + rol r12 + rol r13 + adc r14,r1 + eor r22,r14 + eor r23,r15 + eor r26,r12 + eor r27,r13 + mov r8,r29 + mov r9,r2 + mov r10,r3 + mov r11,r28 + mov r0,r1 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + or r11,r0 + eor r8,r28 + eor r9,r29 + eor r10,r2 + eor r11,r3 + mov r12,r5 + mov r13,r6 + mov r14,r7 + mov r15,r4 + mov r0,r1 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + or r15,r0 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + movw r24,r8 + movw r16,r10 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r8,r24 + eor r9,r25 + eor r10,r16 + eor r11,r17 + movw r24,r12 + movw r16,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r12,r24 + eor r13,r25 + eor r14,r16 + eor r15,r17 + mov r0,r2 + mov r2,r28 + mov r28,r0 + mov r0,r3 + mov r3,r29 + mov r29,r0 + bst r28,0 + lsr r3 + ror r2 + ror r29 + ror r28 + bld r3,7 + eor r28,r8 + eor r29,r9 + eor r2,r10 + eor r3,r11 + mov r0,r6 + mov r6,r4 + mov r4,r0 + mov r0,r7 + mov r7,r5 + mov r5,r0 + bst r4,0 + lsr r7 + ror r6 + ror r5 + ror r4 + bld r7,7 + eor r4,r12 + eor r5,r13 + eor r6,r14 + eor r7,r15 + movw r8,r28 + movw r10,r2 + lsl r8 + rol r9 + rol r10 + rol r11 + adc r8,r1 + eor r8,r28 + eor r9,r29 + eor r10,r2 + eor r11,r3 + movw r12,r4 + movw r14,r6 + lsl r12 + rol r13 + rol r14 + rol r15 + adc r12,r1 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + mov r24,r15 + mov r25,r12 + mov r16,r13 + mov r17,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r28,r24 + eor r29,r25 + eor r2,r16 + eor r3,r17 + mov r24,r11 + mov r25,r8 + mov r16,r9 + mov r17,r10 + bst r24,0 + lsr r17 + ror r16 + ror r25 + ror r24 + bld r17,7 + eor r4,r24 + eor r5,r25 + eor r6,r16 + eor r7,r17 + lsl r10 + rol r11 + rol r8 + rol r9 + adc r10,r1 + eor r28,r10 + eor r29,r11 + eor r2,r8 + eor r3,r9 + lsl r14 + rol r15 + rol r12 + rol r13 + adc r14,r1 + eor r4,r14 + eor r5,r15 + eor r6,r12 + eor r7,r13 + ret +1284: + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+16 + ldd r23,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + ldd r28,Z+32 + ldd r29,Z+33 + ldd r2,Z+34 + ldd r3,Z+35 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + std Z+32,r22 + std Z+33,r23 + std Z+34,r26 + std Z+35,r27 + eor r22,r28 + eor r23,r29 + eor r26,r2 + eor r27,r3 + st Z,r22 + std Z+1,r23 + std Z+2,r26 + std Z+3,r27 + eor r18,r28 + eor r19,r29 + eor r20,r2 + eor r21,r3 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r20,Z+6 + ldd r21,Z+7 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+36 + ldd r29,Z+37 + ldd r2,Z+38 + ldd r3,Z+39 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + eor r22,r28 + eor r23,r29 + eor r26,r2 + eor r27,r3 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + eor r18,r28 + eor r19,r29 + eor r20,r2 + eor r21,r3 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r20,Z+10 + ldd r21,Z+11 + ldd r22,Z+24 + ldd r23,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + std Z+40,r22 + std Z+41,r23 + std Z+42,r26 + std Z+43,r27 + eor r22,r28 + eor r23,r29 + eor r26,r2 + eor r27,r3 + std Z+8,r22 + std Z+9,r23 + std Z+10,r26 + std Z+11,r27 + eor r18,r28 + eor r19,r29 + eor r20,r2 + eor r21,r3 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r20,Z+14 + ldd r21,Z+15 + ldd r22,Z+28 + ldd r23,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + ldd r28,Z+44 + ldd r29,Z+45 + ldd r2,Z+46 + ldd r3,Z+47 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + std Z+44,r22 + std Z+45,r23 + std Z+46,r26 + std Z+47,r27 + eor r22,r28 + eor r23,r29 + eor r26,r2 + eor r27,r3 + std Z+12,r22 + std Z+13,r23 + std Z+14,r26 + std Z+15,r27 + eor r18,r28 + eor r19,r29 + eor r20,r2 + eor r21,r3 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ret +1430: + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size shadow384, .-shadow384 + +#endif diff --git a/spook/Implementations/crypto_aead/spook128su384v1/rhys/internal-spook.c b/spook/Implementations/crypto_aead/spook128su384v1/rhys/internal-spook.c index 0e19216..068938b 100644 --- a/spook/Implementations/crypto_aead/spook128su384v1/rhys/internal-spook.c +++ b/spook/Implementations/crypto_aead/spook128su384v1/rhys/internal-spook.c @@ -22,6 +22,8 @@ #include "internal-spook.h" +#if !defined(__AVR__) + /** * \brief Number of steps in the Clyde-128 block cipher. * @@ -43,9 +45,9 @@ static uint8_t const rc[CLYDE128_STEPS][8] = { }; void clyde128_encrypt(const unsigned char key[CLYDE128_KEY_SIZE], - const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4], uint32_t output[CLYDE128_BLOCK_SIZE / 4], - const uint32_t input[CLYDE128_BLOCK_SIZE / 4]) + const uint32_t input[CLYDE128_BLOCK_SIZE / 4], + const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4]) { uint32_t k0, k1, k2, k3; uint32_t t0, t1, t2, t3; @@ -154,9 +156,9 @@ void clyde128_encrypt(const unsigned char key[CLYDE128_KEY_SIZE], } void clyde128_decrypt(const unsigned char key[CLYDE128_KEY_SIZE], - const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4], uint32_t output[CLYDE128_BLOCK_SIZE / 4], - const unsigned char input[CLYDE128_BLOCK_SIZE]) + const unsigned char input[CLYDE128_BLOCK_SIZE], + const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4]) { uint32_t k0, k1, k2, k3; uint32_t t0, t1, t2, t3; @@ -555,3 +557,5 @@ void shadow384(shadow384_state_t *state) le_store_word32(state->B + 44, s23); #endif } + +#endif /* !__AVR__ */ diff --git a/spook/Implementations/crypto_aead/spook128su384v1/rhys/internal-spook.h b/spook/Implementations/crypto_aead/spook128su384v1/rhys/internal-spook.h index b08ce80..77c8b86 100644 --- a/spook/Implementations/crypto_aead/spook128su384v1/rhys/internal-spook.h +++ b/spook/Implementations/crypto_aead/spook128su384v1/rhys/internal-spook.h @@ -93,31 +93,31 @@ typedef union * \brief Encrypts a block with the Clyde-128 block cipher. * * \param key Points to the key to encrypt with. - * \param tweak Points to the tweak to encrypt with. * \param output Output buffer for the ciphertext. * \param input Input buffer for the plaintext. + * \param tweak Points to the tweak to encrypt with. * * \sa clyde128_decrypt() */ void clyde128_encrypt(const unsigned char key[CLYDE128_KEY_SIZE], - const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4], uint32_t output[CLYDE128_BLOCK_SIZE / 4], - const uint32_t input[CLYDE128_BLOCK_SIZE / 4]); + const uint32_t input[CLYDE128_BLOCK_SIZE / 4], + const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4]); /** * \brief Decrypts a block with the Clyde-128 block cipher. * * \param key Points to the key to decrypt with. - * \param tweak Points to the tweak to decrypt with. * \param output Output buffer for the plaintext. * \param input Input buffer for the ciphertext. + * \param tweak Points to the tweak to decrypt with. * * \sa clyde128_encrypt() */ void clyde128_decrypt(const unsigned char key[CLYDE128_KEY_SIZE], - const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4], uint32_t output[CLYDE128_BLOCK_SIZE / 4], - const unsigned char input[CLYDE128_BLOCK_SIZE]); + const unsigned char input[CLYDE128_BLOCK_SIZE], + const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4]); /** * \brief Performs the Shadow-512 permutation on a state. diff --git a/spook/Implementations/crypto_aead/spook128su384v1/rhys/spook.c b/spook/Implementations/crypto_aead/spook128su384v1/rhys/spook.c index d075b33..2dbab94 100644 --- a/spook/Implementations/crypto_aead/spook128su384v1/rhys/spook.c +++ b/spook/Implementations/crypto_aead/spook128su384v1/rhys/spook.c @@ -86,7 +86,7 @@ static void spook_128_512_init state->B[CLYDE128_BLOCK_SIZE - 1] |= 0x40; } memcpy(state->B + CLYDE128_BLOCK_SIZE, npub, CLYDE128_BLOCK_SIZE); - clyde128_encrypt(k, state->W, state->W + 12, state->W + 4); + clyde128_encrypt(k, state->W + 12, state->W + 4, state->W); shadow512(state); } @@ -111,7 +111,7 @@ static void spook_128_384_init state->B[CLYDE128_BLOCK_SIZE - 1] |= 0x40; } memcpy(state->B + CLYDE128_BLOCK_SIZE, npub, CLYDE128_BLOCK_SIZE); - clyde128_encrypt(k, state->W, state->W + 8, state->W + 4); + clyde128_encrypt(k, state->W + 8, state->W + 4, state->W); shadow384(state); } @@ -310,7 +310,7 @@ int spook_128_512_su_aead_encrypt /* Compute the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_encrypt(k, state.W + 4, state.W, state.W); + clyde128_encrypt(k, state.W, state.W, state.W + 4); memcpy(c + mlen, state.B, SPOOK_TAG_SIZE); return 0; } @@ -345,7 +345,7 @@ int spook_128_512_su_aead_decrypt /* Check the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_decrypt(k, state.W + 4, state.W + 4, c + clen); + clyde128_decrypt(k, state.W + 4, c + clen, state.W + 4); return aead_check_tag (m, clen, state.B, state.B + CLYDE128_BLOCK_SIZE, SPOOK_TAG_SIZE); } @@ -377,7 +377,7 @@ int spook_128_384_su_aead_encrypt /* Compute the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_encrypt(k, state.W + 4, state.W, state.W); + clyde128_encrypt(k, state.W, state.W, state.W + 4); memcpy(c + mlen, state.B, SPOOK_TAG_SIZE); return 0; } @@ -412,7 +412,7 @@ int spook_128_384_su_aead_decrypt /* Check the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_decrypt(k, state.W + 4, state.W + 4, c + clen); + clyde128_decrypt(k, state.W + 4, c + clen, state.W + 4); return aead_check_tag (m, clen, state.B, state.B + CLYDE128_BLOCK_SIZE, SPOOK_TAG_SIZE); } @@ -444,7 +444,7 @@ int spook_128_512_mu_aead_encrypt /* Compute the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_encrypt(k, state.W + 4, state.W, state.W); + clyde128_encrypt(k, state.W, state.W, state.W + 4); memcpy(c + mlen, state.B, SPOOK_TAG_SIZE); return 0; } @@ -479,7 +479,7 @@ int spook_128_512_mu_aead_decrypt /* Check the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_decrypt(k, state.W + 4, state.W + 4, c + clen); + clyde128_decrypt(k, state.W + 4, c + clen, state.W + 4); return aead_check_tag (m, clen, state.B, state.B + CLYDE128_BLOCK_SIZE, SPOOK_TAG_SIZE); } @@ -511,7 +511,7 @@ int spook_128_384_mu_aead_encrypt /* Compute the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_encrypt(k, state.W + 4, state.W, state.W); + clyde128_encrypt(k, state.W, state.W, state.W + 4); memcpy(c + mlen, state.B, SPOOK_TAG_SIZE); return 0; } @@ -546,7 +546,7 @@ int spook_128_384_mu_aead_decrypt /* Check the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_decrypt(k, state.W + 4, state.W + 4, c + clen); + clyde128_decrypt(k, state.W + 4, c + clen, state.W + 4); return aead_check_tag (m, clen, state.B, state.B + CLYDE128_BLOCK_SIZE, SPOOK_TAG_SIZE); } diff --git a/spook/Implementations/crypto_aead/spook128su512v1/rhys/internal-spook-avr.S b/spook/Implementations/crypto_aead/spook128su512v1/rhys/internal-spook-avr.S new file mode 100644 index 0000000..05ca51e --- /dev/null +++ b/spook/Implementations/crypto_aead/spook128su512v1/rhys/internal-spook-avr.S @@ -0,0 +1,4898 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .text +.global clyde128_encrypt + .type clyde128_encrypt, @function +clyde128_encrypt: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r23 + push r22 + movw r30,r24 + movw r26,r20 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 36 + ld r20,X+ + ld r21,X+ + ld r22,X+ + ld r23,X+ + ld r2,X+ + ld r3,X+ + ld r4,X+ + ld r5,X+ + ld r6,X+ + ld r7,X+ + ld r8,X+ + ld r9,X+ + ld r10,X+ + ld r11,X+ + ld r12,X+ + ld r13,X+ + ld r0,Z + eor r20,r0 + ldd r0,Z+1 + eor r21,r0 + ldd r0,Z+2 + eor r22,r0 + ldd r0,Z+3 + eor r23,r0 + ldd r0,Z+4 + eor r2,r0 + ldd r0,Z+5 + eor r3,r0 + ldd r0,Z+6 + eor r4,r0 + ldd r0,Z+7 + eor r5,r0 + ldd r0,Z+8 + eor r6,r0 + ldd r0,Z+9 + eor r7,r0 + ldd r0,Z+10 + eor r8,r0 + ldd r0,Z+11 + eor r9,r0 + ldd r0,Z+12 + eor r10,r0 + ldd r0,Z+13 + eor r11,r0 + ldd r0,Z+14 + eor r12,r0 + ldd r0,Z+15 + eor r13,r0 + movw r26,r18 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+1,r18 + std Y+2,r19 + std Y+3,r14 + std Y+4,r15 + eor r20,r18 + eor r21,r19 + eor r22,r14 + eor r23,r15 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+5,r18 + std Y+6,r19 + std Y+7,r14 + std Y+8,r15 + eor r2,r18 + eor r3,r19 + eor r4,r14 + eor r5,r15 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+9,r18 + std Y+10,r19 + std Y+11,r14 + std Y+12,r15 + eor r6,r18 + eor r7,r19 + eor r8,r14 + eor r9,r15 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+13,r18 + std Y+14,r19 + std Y+15,r14 + std Y+16,r15 + eor r10,r18 + eor r11,r19 + eor r12,r14 + eor r13,r15 + rcall 151f + ldi r27,1 + eor r20,r27 + rcall 151f + ldi r27,1 + eor r2,r27 + rcall 635f + rcall 151f + ldi r27,1 + eor r6,r27 + rcall 151f + ldi r27,1 + eor r10,r27 + rcall 635f + rcall 151f + ldi r27,1 + eor r20,r27 + eor r2,r27 + rcall 151f + ldi r27,1 + eor r2,r27 + eor r6,r27 + rcall 635f + rcall 151f + ldi r27,1 + eor r6,r27 + eor r10,r27 + rcall 151f + ldi r27,1 + eor r20,r27 + eor r2,r27 + eor r10,r27 + rcall 635f + rcall 151f + ldi r27,1 + eor r20,r27 + eor r6,r27 + rcall 151f + ldi r27,1 + eor r2,r27 + eor r10,r27 + rcall 635f + rcall 151f + ldi r27,1 + eor r20,r27 + eor r2,r27 + eor r6,r27 + rcall 151f + ldi r27,1 + eor r2,r27 + eor r6,r27 + eor r10,r27 + rcall 635f + rjmp 725f +151: + movw r18,r20 + movw r14,r22 + and r18,r2 + and r19,r3 + and r14,r4 + and r15,r5 + eor r18,r6 + eor r19,r7 + eor r14,r8 + eor r15,r9 + movw r24,r10 + movw r16,r12 + and r24,r20 + and r25,r21 + and r16,r22 + and r17,r23 + eor r24,r2 + eor r25,r3 + eor r16,r4 + eor r17,r5 + movw r6,r18 + movw r8,r14 + and r6,r24 + and r7,r25 + and r8,r16 + and r9,r17 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + and r10,r18 + and r11,r19 + and r12,r14 + and r13,r15 + eor r10,r20 + eor r11,r21 + eor r12,r22 + eor r13,r23 + movw r20,r24 + movw r22,r16 + movw r2,r18 + movw r4,r14 + mov r18,r21 + mov r19,r22 + mov r14,r23 + mov r15,r20 + eor r0,r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + or r15,r0 + eor r18,r20 + eor r19,r21 + eor r14,r22 + eor r15,r23 + mov r24,r3 + mov r25,r4 + mov r16,r5 + mov r17,r2 + eor r0,r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r24,r2 + eor r25,r3 + eor r16,r4 + eor r17,r5 + movw r26,r18 + mov r1,r14 + mov r0,r15 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r18,r26 + eor r19,r27 + eor r14,r1 + eor r15,r0 + movw r26,r24 + mov r1,r16 + mov r0,r17 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r24,r26 + eor r25,r27 + eor r16,r1 + eor r17,r0 + mov r0,r22 + mov r22,r20 + mov r20,r0 + mov r0,r23 + mov r23,r21 + mov r21,r0 + bst r20,0 + lsr r23 + ror r22 + ror r21 + ror r20 + bld r23,7 + eor r20,r18 + eor r21,r19 + eor r22,r14 + eor r23,r15 + mov r0,r4 + mov r4,r2 + mov r2,r0 + mov r0,r5 + mov r5,r3 + mov r3,r0 + bst r2,0 + lsr r5 + ror r4 + ror r3 + ror r2 + bld r5,7 + eor r2,r24 + eor r3,r25 + eor r4,r16 + eor r5,r17 + movw r18,r20 + movw r14,r22 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + eor r18,r20 + eor r19,r21 + eor r14,r22 + eor r15,r23 + movw r24,r2 + movw r16,r4 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + eor r24,r2 + eor r25,r3 + eor r16,r4 + eor r17,r5 + mov r26,r17 + mov r27,r24 + mov r1,r25 + mov r0,r16 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r20,r26 + eor r21,r27 + eor r22,r1 + eor r23,r0 + mov r26,r15 + mov r27,r18 + mov r1,r19 + mov r0,r14 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r2,r26 + eor r3,r27 + eor r4,r1 + eor r5,r0 + bst r19,7 + lsl r14 + rol r15 + rol r18 + rol r19 + bld r14,0 + eor r20,r14 + eor r21,r15 + eor r22,r18 + eor r23,r19 + bst r25,7 + lsl r16 + rol r17 + rol r24 + rol r25 + bld r16,0 + eor r2,r16 + eor r3,r17 + eor r4,r24 + eor r5,r25 + mov r18,r7 + mov r19,r8 + mov r14,r9 + mov r15,r6 + eor r0,r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + or r15,r0 + eor r18,r6 + eor r19,r7 + eor r14,r8 + eor r15,r9 + mov r24,r11 + mov r25,r12 + mov r16,r13 + mov r17,r10 + eor r0,r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r24,r10 + eor r25,r11 + eor r16,r12 + eor r17,r13 + movw r26,r18 + mov r1,r14 + mov r0,r15 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r18,r26 + eor r19,r27 + eor r14,r1 + eor r15,r0 + movw r26,r24 + mov r1,r16 + mov r0,r17 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r24,r26 + eor r25,r27 + eor r16,r1 + eor r17,r0 + mov r0,r8 + mov r8,r6 + mov r6,r0 + mov r0,r9 + mov r9,r7 + mov r7,r0 + bst r6,0 + lsr r9 + ror r8 + ror r7 + ror r6 + bld r9,7 + eor r6,r18 + eor r7,r19 + eor r8,r14 + eor r9,r15 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + bst r10,0 + lsr r13 + ror r12 + ror r11 + ror r10 + bld r13,7 + eor r10,r24 + eor r11,r25 + eor r12,r16 + eor r13,r17 + movw r18,r6 + movw r14,r8 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + eor r18,r6 + eor r19,r7 + eor r14,r8 + eor r15,r9 + movw r24,r10 + movw r16,r12 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + eor r24,r10 + eor r25,r11 + eor r16,r12 + eor r17,r13 + mov r26,r17 + mov r27,r24 + mov r1,r25 + mov r0,r16 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r6,r26 + eor r7,r27 + eor r8,r1 + eor r9,r0 + mov r26,r15 + mov r27,r18 + mov r1,r19 + mov r0,r14 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r10,r26 + eor r11,r27 + eor r12,r1 + eor r13,r0 + bst r19,7 + lsl r14 + rol r15 + rol r18 + rol r19 + bld r14,0 + eor r6,r14 + eor r7,r15 + eor r8,r18 + eor r9,r19 + bst r25,7 + lsl r16 + rol r17 + rol r24 + rol r25 + bld r16,0 + eor r10,r16 + eor r11,r17 + eor r12,r24 + eor r13,r25 + ret +635: + ldd r18,Y+9 + ldd r19,Y+10 + ldd r14,Y+11 + ldd r15,Y+12 + ldd r24,Y+1 + ldd r25,Y+2 + ldd r16,Y+3 + ldd r17,Y+4 + eor r18,r24 + eor r19,r25 + eor r14,r16 + eor r15,r17 + std Y+9,r24 + std Y+10,r25 + std Y+11,r16 + std Y+12,r17 + std Y+1,r18 + std Y+2,r19 + std Y+3,r14 + std Y+4,r15 + eor r20,r18 + eor r21,r19 + eor r22,r14 + eor r23,r15 + eor r6,r24 + eor r7,r25 + eor r8,r16 + eor r9,r17 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r14,Y+15 + ldd r15,Y+16 + ldd r24,Y+5 + ldd r25,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + eor r18,r24 + eor r19,r25 + eor r14,r16 + eor r15,r17 + std Y+13,r24 + std Y+14,r25 + std Y+15,r16 + std Y+16,r17 + std Y+5,r18 + std Y+6,r19 + std Y+7,r14 + std Y+8,r15 + eor r2,r18 + eor r3,r19 + eor r4,r14 + eor r5,r15 + eor r10,r24 + eor r11,r25 + eor r12,r16 + eor r13,r17 + ld r0,Z + eor r20,r0 + ldd r0,Z+1 + eor r21,r0 + ldd r0,Z+2 + eor r22,r0 + ldd r0,Z+3 + eor r23,r0 + ldd r0,Z+4 + eor r2,r0 + ldd r0,Z+5 + eor r3,r0 + ldd r0,Z+6 + eor r4,r0 + ldd r0,Z+7 + eor r5,r0 + ldd r0,Z+8 + eor r6,r0 + ldd r0,Z+9 + eor r7,r0 + ldd r0,Z+10 + eor r8,r0 + ldd r0,Z+11 + eor r9,r0 + ldd r0,Z+12 + eor r10,r0 + ldd r0,Z+13 + eor r11,r0 + ldd r0,Z+14 + eor r12,r0 + ldd r0,Z+15 + eor r13,r0 + ret +725: + ldd r26,Y+17 + ldd r27,Y+18 + st X+,r20 + st X+,r21 + st X+,r22 + st X+,r23 + st X+,r2 + st X+,r3 + st X+,r4 + st X+,r5 + st X+,r6 + st X+,r7 + st X+,r8 + st X+,r9 + st X+,r10 + st X+,r11 + st X+,r12 + st X+,r13 + adiw r28,18 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size clyde128_encrypt, .-clyde128_encrypt + + .text +.global clyde128_decrypt + .type clyde128_decrypt, @function +clyde128_decrypt: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r23 + push r22 + movw r30,r24 + movw r26,r20 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 36 + ld r20,X+ + ld r21,X+ + ld r22,X+ + ld r23,X+ + ld r2,X+ + ld r3,X+ + ld r4,X+ + ld r5,X+ + ld r6,X+ + ld r7,X+ + ld r8,X+ + ld r9,X+ + ld r10,X+ + ld r11,X+ + ld r12,X+ + ld r13,X+ + movw r26,r18 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+1,r18 + std Y+2,r19 + std Y+3,r14 + std Y+4,r15 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+5,r18 + std Y+6,r19 + std Y+7,r14 + std Y+8,r15 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+9,r18 + std Y+10,r19 + std Y+11,r14 + std Y+12,r15 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+13,r18 + std Y+14,r19 + std Y+15,r14 + std Y+16,r15 + rcall 533f + ldi r27,1 + eor r2,r27 + eor r6,r27 + eor r10,r27 + rcall 103f + ldi r27,1 + eor r20,r27 + eor r2,r27 + eor r6,r27 + rcall 103f + rcall 533f + ldi r27,1 + eor r2,r27 + eor r10,r27 + rcall 103f + ldi r27,1 + eor r20,r27 + eor r6,r27 + rcall 103f + rcall 533f + ldi r27,1 + eor r20,r27 + eor r2,r27 + eor r10,r27 + rcall 103f + ldi r27,1 + eor r6,r27 + eor r10,r27 + rcall 103f + rcall 533f + ldi r27,1 + eor r2,r27 + eor r6,r27 + rcall 103f + ldi r27,1 + eor r20,r27 + eor r2,r27 + rcall 103f + rcall 533f + ldi r27,1 + eor r10,r27 + rcall 103f + ldi r27,1 + eor r6,r27 + rcall 103f + rcall 533f + ldi r27,1 + eor r2,r27 + rcall 103f + ldi r27,1 + eor r20,r27 + rcall 103f + rjmp 623f +103: + mov r18,r23 + mov r19,r20 + mov r14,r21 + mov r15,r22 + bst r18,0 + lsr r15 + ror r14 + ror r19 + ror r18 + bld r15,7 + eor r18,r20 + eor r19,r21 + eor r14,r22 + eor r15,r23 + mov r24,r5 + mov r25,r2 + mov r16,r3 + mov r17,r4 + bst r24,0 + lsr r17 + ror r16 + ror r25 + ror r24 + bld r17,7 + eor r24,r2 + eor r25,r3 + eor r16,r4 + eor r17,r5 + movw r26,r18 + mov r1,r14 + mov r0,r15 + bst r0,7 + lsl r26 + rol r27 + rol r1 + rol r0 + bld r26,0 + eor r20,r26 + eor r21,r27 + eor r22,r1 + eor r23,r0 + movw r26,r24 + mov r1,r16 + mov r0,r17 + bst r0,7 + lsl r26 + rol r27 + rol r1 + rol r0 + bld r26,0 + eor r2,r26 + eor r3,r27 + eor r4,r1 + eor r5,r0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + eor r20,r15 + eor r21,r18 + eor r22,r19 + eor r23,r14 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + eor r2,r17 + eor r3,r24 + eor r4,r25 + eor r5,r16 + movw r18,r20 + movw r14,r22 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + eor r18,r20 + eor r19,r21 + eor r14,r22 + eor r15,r23 + movw r24,r2 + movw r16,r4 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + eor r24,r2 + eor r25,r3 + eor r16,r4 + eor r17,r5 + mov r26,r17 + mov r27,r24 + mov r1,r25 + mov r0,r16 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r20,r26 + eor r21,r27 + eor r22,r1 + eor r23,r0 + mov r26,r15 + mov r27,r18 + mov r1,r19 + mov r0,r14 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r2,r26 + eor r3,r27 + eor r4,r1 + eor r5,r0 + bst r20,0 + lsr r23 + ror r22 + ror r21 + ror r20 + bld r23,7 + eor r18,r22 + eor r19,r23 + eor r14,r20 + eor r15,r21 + bst r2,0 + lsr r5 + ror r4 + ror r3 + ror r2 + bld r5,7 + eor r24,r4 + eor r25,r5 + eor r16,r2 + eor r17,r3 + movw r20,r14 + movw r22,r18 + movw r2,r16 + movw r4,r24 + mov r18,r9 + mov r19,r6 + mov r14,r7 + mov r15,r8 + bst r18,0 + lsr r15 + ror r14 + ror r19 + ror r18 + bld r15,7 + eor r18,r6 + eor r19,r7 + eor r14,r8 + eor r15,r9 + mov r24,r13 + mov r25,r10 + mov r16,r11 + mov r17,r12 + bst r24,0 + lsr r17 + ror r16 + ror r25 + ror r24 + bld r17,7 + eor r24,r10 + eor r25,r11 + eor r16,r12 + eor r17,r13 + movw r26,r18 + mov r1,r14 + mov r0,r15 + bst r0,7 + lsl r26 + rol r27 + rol r1 + rol r0 + bld r26,0 + eor r6,r26 + eor r7,r27 + eor r8,r1 + eor r9,r0 + movw r26,r24 + mov r1,r16 + mov r0,r17 + bst r0,7 + lsl r26 + rol r27 + rol r1 + rol r0 + bld r26,0 + eor r10,r26 + eor r11,r27 + eor r12,r1 + eor r13,r0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + eor r6,r15 + eor r7,r18 + eor r8,r19 + eor r9,r14 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + eor r10,r17 + eor r11,r24 + eor r12,r25 + eor r13,r16 + movw r18,r6 + movw r14,r8 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + eor r18,r6 + eor r19,r7 + eor r14,r8 + eor r15,r9 + movw r24,r10 + movw r16,r12 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + eor r24,r10 + eor r25,r11 + eor r16,r12 + eor r17,r13 + mov r26,r17 + mov r27,r24 + mov r1,r25 + mov r0,r16 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r6,r26 + eor r7,r27 + eor r8,r1 + eor r9,r0 + mov r26,r15 + mov r27,r18 + mov r1,r19 + mov r0,r14 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r10,r26 + eor r11,r27 + eor r12,r1 + eor r13,r0 + bst r6,0 + lsr r9 + ror r8 + ror r7 + ror r6 + bld r9,7 + eor r18,r8 + eor r19,r9 + eor r14,r6 + eor r15,r7 + bst r10,0 + lsr r13 + ror r12 + ror r11 + ror r10 + bld r13,7 + eor r24,r12 + eor r25,r13 + eor r16,r10 + eor r17,r11 + movw r6,r14 + movw r8,r18 + movw r10,r16 + movw r12,r24 + movw r24,r20 + movw r16,r22 + and r24,r2 + and r25,r3 + and r16,r4 + and r17,r5 + eor r24,r6 + eor r25,r7 + eor r16,r8 + eor r17,r9 + movw r18,r2 + movw r14,r4 + and r18,r24 + and r19,r25 + and r14,r16 + and r15,r17 + eor r18,r10 + eor r19,r11 + eor r14,r12 + eor r15,r13 + movw r10,r24 + movw r12,r16 + and r24,r18 + and r25,r19 + and r16,r14 + and r17,r15 + eor r24,r20 + eor r25,r21 + eor r16,r22 + eor r17,r23 + movw r6,r18 + movw r8,r14 + and r6,r24 + and r7,r25 + and r8,r16 + and r9,r17 + eor r6,r2 + eor r7,r3 + eor r8,r4 + eor r9,r5 + movw r20,r18 + movw r22,r14 + movw r2,r24 + movw r4,r16 + ret +533: + ld r0,Z + eor r20,r0 + ldd r0,Z+1 + eor r21,r0 + ldd r0,Z+2 + eor r22,r0 + ldd r0,Z+3 + eor r23,r0 + ldd r0,Z+4 + eor r2,r0 + ldd r0,Z+5 + eor r3,r0 + ldd r0,Z+6 + eor r4,r0 + ldd r0,Z+7 + eor r5,r0 + ldd r0,Z+8 + eor r6,r0 + ldd r0,Z+9 + eor r7,r0 + ldd r0,Z+10 + eor r8,r0 + ldd r0,Z+11 + eor r9,r0 + ldd r0,Z+12 + eor r10,r0 + ldd r0,Z+13 + eor r11,r0 + ldd r0,Z+14 + eor r12,r0 + ldd r0,Z+15 + eor r13,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r14,Y+3 + ldd r15,Y+4 + ldd r24,Y+9 + ldd r25,Y+10 + ldd r16,Y+11 + ldd r17,Y+12 + eor r20,r18 + eor r21,r19 + eor r22,r14 + eor r23,r15 + eor r6,r24 + eor r7,r25 + eor r8,r16 + eor r9,r17 + eor r18,r24 + eor r19,r25 + eor r14,r16 + eor r15,r17 + std Y+1,r24 + std Y+2,r25 + std Y+3,r16 + std Y+4,r17 + std Y+9,r18 + std Y+10,r19 + std Y+11,r14 + std Y+12,r15 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r14,Y+7 + ldd r15,Y+8 + ldd r24,Y+13 + ldd r25,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + eor r2,r18 + eor r3,r19 + eor r4,r14 + eor r5,r15 + eor r10,r24 + eor r11,r25 + eor r12,r16 + eor r13,r17 + eor r18,r24 + eor r19,r25 + eor r14,r16 + eor r15,r17 + std Y+5,r24 + std Y+6,r25 + std Y+7,r16 + std Y+8,r17 + std Y+13,r18 + std Y+14,r19 + std Y+15,r14 + std Y+16,r15 + ret +623: + ld r0,Z + eor r20,r0 + ldd r0,Z+1 + eor r21,r0 + ldd r0,Z+2 + eor r22,r0 + ldd r0,Z+3 + eor r23,r0 + ldd r0,Z+4 + eor r2,r0 + ldd r0,Z+5 + eor r3,r0 + ldd r0,Z+6 + eor r4,r0 + ldd r0,Z+7 + eor r5,r0 + ldd r0,Z+8 + eor r6,r0 + ldd r0,Z+9 + eor r7,r0 + ldd r0,Z+10 + eor r8,r0 + ldd r0,Z+11 + eor r9,r0 + ldd r0,Z+12 + eor r10,r0 + ldd r0,Z+13 + eor r11,r0 + ldd r0,Z+14 + eor r12,r0 + ldd r0,Z+15 + eor r13,r0 + ldd r0,Y+1 + eor r20,r0 + ldd r0,Y+2 + eor r21,r0 + ldd r0,Y+3 + eor r22,r0 + ldd r0,Y+4 + eor r23,r0 + ldd r0,Y+5 + eor r2,r0 + ldd r0,Y+6 + eor r3,r0 + ldd r0,Y+7 + eor r4,r0 + ldd r0,Y+8 + eor r5,r0 + ldd r0,Y+9 + eor r6,r0 + ldd r0,Y+10 + eor r7,r0 + ldd r0,Y+11 + eor r8,r0 + ldd r0,Y+12 + eor r9,r0 + ldd r0,Y+13 + eor r10,r0 + ldd r0,Y+14 + eor r11,r0 + ldd r0,Y+15 + eor r12,r0 + ldd r0,Y+16 + eor r13,r0 + ldd r26,Y+17 + ldd r27,Y+18 + st X+,r20 + st X+,r21 + st X+,r22 + st X+,r23 + st X+,r2 + st X+,r3 + st X+,r4 + st X+,r5 + st X+,r6 + st X+,r7 + st X+,r8 + st X+,r9 + st X+,r10 + st X+,r11 + st X+,r12 + st X+,r13 + adiw r28,18 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size clyde128_decrypt, .-clyde128_decrypt + + .text +.global shadow512 + .type shadow512, @function +shadow512: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 1083f + rcall 1127f + ldi r25,1 + eor r18,r25 + rcall 1083f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 1083f + rcall 1127f + ldi r25,2 + eor r18,r25 + rcall 1083f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 1083f + rcall 1127f + ldi r25,4 + eor r18,r25 + rcall 1083f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + ldd r18,Z+48 + ldd r19,Z+49 + ldd r20,Z+50 + ldd r21,Z+51 + ldd r22,Z+52 + ldd r23,Z+53 + ldd r26,Z+54 + ldd r27,Z+55 + ldd r28,Z+56 + ldd r29,Z+57 + ldd r2,Z+58 + ldd r3,Z+59 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + rcall 1083f + rcall 1127f + ldi r25,8 + eor r18,r25 + rcall 1083f + std Z+48,r18 + std Z+49,r19 + std Z+50,r20 + std Z+51,r21 + std Z+52,r22 + std Z+53,r23 + std Z+54,r26 + std Z+55,r27 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + std Z+60,r4 + std Z+61,r5 + std Z+62,r6 + std Z+63,r7 + rcall 1553f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+52 + ldi r16,8 + eor r18,r16 + std Z+52,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 1083f + rcall 1127f + ldi r25,1 + eor r28,r25 + rcall 1083f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 1083f + rcall 1127f + ldi r25,2 + eor r28,r25 + rcall 1083f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 1083f + rcall 1127f + ldi r25,4 + eor r28,r25 + rcall 1083f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + ldd r18,Z+48 + ldd r19,Z+49 + ldd r20,Z+50 + ldd r21,Z+51 + ldd r22,Z+52 + ldd r23,Z+53 + ldd r26,Z+54 + ldd r27,Z+55 + ldd r28,Z+56 + ldd r29,Z+57 + ldd r2,Z+58 + ldd r3,Z+59 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + rcall 1083f + rcall 1127f + ldi r25,8 + eor r28,r25 + rcall 1083f + std Z+48,r18 + std Z+49,r19 + std Z+50,r20 + std Z+51,r21 + std Z+52,r22 + std Z+53,r23 + std Z+54,r26 + std Z+55,r27 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + std Z+60,r4 + std Z+61,r5 + std Z+62,r6 + std Z+63,r7 + rcall 1553f + ldd r18,Z+12 + ldi r25,1 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+28 + ldi r24,2 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+44 + ldi r17,4 + eor r18,r17 + std Z+44,r18 + ldd r18,Z+60 + ldi r16,8 + eor r18,r16 + std Z+60,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 1083f + rcall 1127f + ldi r25,1 + eor r18,r25 + eor r22,r25 + rcall 1083f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 1083f + rcall 1127f + ldi r25,2 + eor r18,r25 + eor r22,r25 + rcall 1083f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 1083f + rcall 1127f + ldi r25,4 + eor r18,r25 + eor r22,r25 + rcall 1083f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + ldd r18,Z+48 + ldd r19,Z+49 + ldd r20,Z+50 + ldd r21,Z+51 + ldd r22,Z+52 + ldd r23,Z+53 + ldd r26,Z+54 + ldd r27,Z+55 + ldd r28,Z+56 + ldd r29,Z+57 + ldd r2,Z+58 + ldd r3,Z+59 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + rcall 1083f + rcall 1127f + ldi r25,8 + eor r18,r25 + eor r22,r25 + rcall 1083f + std Z+48,r18 + std Z+49,r19 + std Z+50,r20 + std Z+51,r21 + std Z+52,r22 + std Z+53,r23 + std Z+54,r26 + std Z+55,r27 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + std Z+60,r4 + std Z+61,r5 + std Z+62,r6 + std Z+63,r7 + rcall 1553f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+8 + eor r18,r25 + std Z+8,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+24 + eor r18,r24 + std Z+24,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+40 + eor r18,r17 + std Z+40,r18 + ldd r18,Z+52 + ldi r16,8 + eor r18,r16 + std Z+52,r18 + ldd r18,Z+56 + eor r18,r16 + std Z+56,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 1083f + rcall 1127f + ldi r25,1 + eor r28,r25 + eor r4,r25 + rcall 1083f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 1083f + rcall 1127f + ldi r25,2 + eor r28,r25 + eor r4,r25 + rcall 1083f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 1083f + rcall 1127f + ldi r25,4 + eor r28,r25 + eor r4,r25 + rcall 1083f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + ldd r18,Z+48 + ldd r19,Z+49 + ldd r20,Z+50 + ldd r21,Z+51 + ldd r22,Z+52 + ldd r23,Z+53 + ldd r26,Z+54 + ldd r27,Z+55 + ldd r28,Z+56 + ldd r29,Z+57 + ldd r2,Z+58 + ldd r3,Z+59 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + rcall 1083f + rcall 1127f + ldi r25,8 + eor r28,r25 + eor r4,r25 + rcall 1083f + std Z+48,r18 + std Z+49,r19 + std Z+50,r20 + std Z+51,r21 + std Z+52,r22 + std Z+53,r23 + std Z+54,r26 + std Z+55,r27 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + std Z+60,r4 + std Z+61,r5 + std Z+62,r6 + std Z+63,r7 + rcall 1553f + ld r18,Z + ldi r25,1 + eor r18,r25 + st Z,r18 + ldd r18,Z+4 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+12 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+16 + ldi r24,2 + eor r18,r24 + std Z+16,r18 + ldd r18,Z+20 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+28 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+32 + ldi r17,4 + eor r18,r17 + std Z+32,r18 + ldd r18,Z+36 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+44 + eor r18,r17 + std Z+44,r18 + ldd r18,Z+48 + ldi r16,8 + eor r18,r16 + std Z+48,r18 + ldd r18,Z+52 + eor r18,r16 + std Z+52,r18 + ldd r18,Z+60 + eor r18,r16 + std Z+60,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 1083f + rcall 1127f + ldi r25,1 + eor r18,r25 + eor r28,r25 + rcall 1083f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 1083f + rcall 1127f + ldi r25,2 + eor r18,r25 + eor r28,r25 + rcall 1083f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 1083f + rcall 1127f + ldi r25,4 + eor r18,r25 + eor r28,r25 + rcall 1083f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + ldd r18,Z+48 + ldd r19,Z+49 + ldd r20,Z+50 + ldd r21,Z+51 + ldd r22,Z+52 + ldd r23,Z+53 + ldd r26,Z+54 + ldd r27,Z+55 + ldd r28,Z+56 + ldd r29,Z+57 + ldd r2,Z+58 + ldd r3,Z+59 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + rcall 1083f + rcall 1127f + ldi r25,8 + eor r18,r25 + eor r28,r25 + rcall 1083f + std Z+48,r18 + std Z+49,r19 + std Z+50,r20 + std Z+51,r21 + std Z+52,r22 + std Z+53,r23 + std Z+54,r26 + std Z+55,r27 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + std Z+60,r4 + std Z+61,r5 + std Z+62,r6 + std Z+63,r7 + rcall 1553f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+12 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+28 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+44 + eor r18,r17 + std Z+44,r18 + ldd r18,Z+52 + ldi r16,8 + eor r18,r16 + std Z+52,r18 + ldd r18,Z+60 + eor r18,r16 + std Z+60,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 1083f + rcall 1127f + ldi r25,1 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 1083f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 1083f + rcall 1127f + ldi r25,2 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 1083f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 1083f + rcall 1127f + ldi r25,4 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 1083f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + ldd r18,Z+48 + ldd r19,Z+49 + ldd r20,Z+50 + ldd r21,Z+51 + ldd r22,Z+52 + ldd r23,Z+53 + ldd r26,Z+54 + ldd r27,Z+55 + ldd r28,Z+56 + ldd r29,Z+57 + ldd r2,Z+58 + ldd r3,Z+59 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + rcall 1083f + rcall 1127f + ldi r25,8 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 1083f + std Z+48,r18 + std Z+49,r19 + std Z+50,r20 + std Z+51,r21 + std Z+52,r22 + std Z+53,r23 + std Z+54,r26 + std Z+55,r27 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + std Z+60,r4 + std Z+61,r5 + std Z+62,r6 + std Z+63,r7 + rcall 1553f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+8 + eor r18,r25 + std Z+8,r18 + ldd r18,Z+12 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+24 + eor r18,r24 + std Z+24,r18 + ldd r18,Z+28 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+40 + eor r18,r17 + std Z+40,r18 + ldd r18,Z+44 + eor r18,r17 + std Z+44,r18 + ldd r18,Z+52 + ldi r16,8 + eor r18,r16 + std Z+52,r18 + ldd r18,Z+56 + eor r18,r16 + std Z+56,r18 + ldd r18,Z+60 + eor r18,r16 + std Z+60,r18 + rjmp 1795f +1083: + movw r8,r18 + movw r10,r20 + and r8,r22 + and r9,r23 + and r10,r26 + and r11,r27 + eor r8,r28 + eor r9,r29 + eor r10,r2 + eor r11,r3 + movw r12,r4 + movw r14,r6 + and r12,r18 + and r13,r19 + and r14,r20 + and r15,r21 + eor r12,r22 + eor r13,r23 + eor r14,r26 + eor r15,r27 + movw r28,r8 + movw r2,r10 + and r28,r12 + and r29,r13 + and r2,r14 + and r3,r15 + eor r28,r4 + eor r29,r5 + eor r2,r6 + eor r3,r7 + and r4,r8 + and r5,r9 + and r6,r10 + and r7,r11 + eor r4,r18 + eor r5,r19 + eor r6,r20 + eor r7,r21 + movw r18,r12 + movw r20,r14 + movw r22,r8 + movw r26,r10 + ret +1127: + mov r8,r19 + mov r9,r20 + mov r10,r21 + mov r11,r18 + mov r0,r1 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + or r11,r0 + eor r8,r18 + eor r9,r19 + eor r10,r20 + eor r11,r21 + mov r12,r23 + mov r13,r26 + mov r14,r27 + mov r15,r22 + mov r0,r1 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + or r15,r0 + eor r12,r22 + eor r13,r23 + eor r14,r26 + eor r15,r27 + movw r24,r8 + movw r16,r10 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r8,r24 + eor r9,r25 + eor r10,r16 + eor r11,r17 + movw r24,r12 + movw r16,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r12,r24 + eor r13,r25 + eor r14,r16 + eor r15,r17 + mov r0,r20 + mov r20,r18 + mov r18,r0 + mov r0,r21 + mov r21,r19 + mov r19,r0 + bst r18,0 + lsr r21 + ror r20 + ror r19 + ror r18 + bld r21,7 + eor r18,r8 + eor r19,r9 + eor r20,r10 + eor r21,r11 + mov r0,r26 + mov r26,r22 + mov r22,r0 + mov r0,r27 + mov r27,r23 + mov r23,r0 + bst r22,0 + lsr r27 + ror r26 + ror r23 + ror r22 + bld r27,7 + eor r22,r12 + eor r23,r13 + eor r26,r14 + eor r27,r15 + movw r8,r18 + movw r10,r20 + lsl r8 + rol r9 + rol r10 + rol r11 + adc r8,r1 + eor r8,r18 + eor r9,r19 + eor r10,r20 + eor r11,r21 + movw r12,r22 + movw r14,r26 + lsl r12 + rol r13 + rol r14 + rol r15 + adc r12,r1 + eor r12,r22 + eor r13,r23 + eor r14,r26 + eor r15,r27 + mov r24,r15 + mov r25,r12 + mov r16,r13 + mov r17,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r18,r24 + eor r19,r25 + eor r20,r16 + eor r21,r17 + mov r24,r11 + mov r25,r8 + mov r16,r9 + mov r17,r10 + bst r24,0 + lsr r17 + ror r16 + ror r25 + ror r24 + bld r17,7 + eor r22,r24 + eor r23,r25 + eor r26,r16 + eor r27,r17 + lsl r10 + rol r11 + rol r8 + rol r9 + adc r10,r1 + eor r18,r10 + eor r19,r11 + eor r20,r8 + eor r21,r9 + lsl r14 + rol r15 + rol r12 + rol r13 + adc r14,r1 + eor r22,r14 + eor r23,r15 + eor r26,r12 + eor r27,r13 + mov r8,r29 + mov r9,r2 + mov r10,r3 + mov r11,r28 + mov r0,r1 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + or r11,r0 + eor r8,r28 + eor r9,r29 + eor r10,r2 + eor r11,r3 + mov r12,r5 + mov r13,r6 + mov r14,r7 + mov r15,r4 + mov r0,r1 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + or r15,r0 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + movw r24,r8 + movw r16,r10 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r8,r24 + eor r9,r25 + eor r10,r16 + eor r11,r17 + movw r24,r12 + movw r16,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r12,r24 + eor r13,r25 + eor r14,r16 + eor r15,r17 + mov r0,r2 + mov r2,r28 + mov r28,r0 + mov r0,r3 + mov r3,r29 + mov r29,r0 + bst r28,0 + lsr r3 + ror r2 + ror r29 + ror r28 + bld r3,7 + eor r28,r8 + eor r29,r9 + eor r2,r10 + eor r3,r11 + mov r0,r6 + mov r6,r4 + mov r4,r0 + mov r0,r7 + mov r7,r5 + mov r5,r0 + bst r4,0 + lsr r7 + ror r6 + ror r5 + ror r4 + bld r7,7 + eor r4,r12 + eor r5,r13 + eor r6,r14 + eor r7,r15 + movw r8,r28 + movw r10,r2 + lsl r8 + rol r9 + rol r10 + rol r11 + adc r8,r1 + eor r8,r28 + eor r9,r29 + eor r10,r2 + eor r11,r3 + movw r12,r4 + movw r14,r6 + lsl r12 + rol r13 + rol r14 + rol r15 + adc r12,r1 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + mov r24,r15 + mov r25,r12 + mov r16,r13 + mov r17,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r28,r24 + eor r29,r25 + eor r2,r16 + eor r3,r17 + mov r24,r11 + mov r25,r8 + mov r16,r9 + mov r17,r10 + bst r24,0 + lsr r17 + ror r16 + ror r25 + ror r24 + bld r17,7 + eor r4,r24 + eor r5,r25 + eor r6,r16 + eor r7,r17 + lsl r10 + rol r11 + rol r8 + rol r9 + adc r10,r1 + eor r28,r10 + eor r29,r11 + eor r2,r8 + eor r3,r9 + lsl r14 + rol r15 + rol r12 + rol r13 + adc r14,r1 + eor r4,r14 + eor r5,r15 + eor r6,r12 + eor r7,r13 + ret +1553: + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+16 + ldd r23,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + ldd r28,Z+32 + ldd r29,Z+33 + ldd r2,Z+34 + ldd r3,Z+35 + ldd r4,Z+48 + ldd r5,Z+49 + ldd r6,Z+50 + ldd r7,Z+51 + movw r8,r18 + movw r10,r20 + eor r8,r22 + eor r9,r23 + eor r10,r26 + eor r11,r27 + movw r12,r28 + movw r14,r2 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + eor r18,r12 + eor r19,r13 + eor r20,r14 + eor r21,r15 + eor r22,r12 + eor r23,r13 + eor r26,r14 + eor r27,r15 + eor r28,r8 + eor r29,r9 + eor r2,r10 + eor r3,r11 + eor r4,r8 + eor r5,r9 + eor r6,r10 + eor r7,r11 + st Z,r22 + std Z+1,r23 + std Z+2,r26 + std Z+3,r27 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+32,r4 + std Z+33,r5 + std Z+34,r6 + std Z+35,r7 + std Z+48,r28 + std Z+49,r29 + std Z+50,r2 + std Z+51,r3 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r20,Z+6 + ldd r21,Z+7 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+36 + ldd r29,Z+37 + ldd r2,Z+38 + ldd r3,Z+39 + ldd r4,Z+52 + ldd r5,Z+53 + ldd r6,Z+54 + ldd r7,Z+55 + movw r8,r18 + movw r10,r20 + eor r8,r22 + eor r9,r23 + eor r10,r26 + eor r11,r27 + movw r12,r28 + movw r14,r2 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + eor r18,r12 + eor r19,r13 + eor r20,r14 + eor r21,r15 + eor r22,r12 + eor r23,r13 + eor r26,r14 + eor r27,r15 + eor r28,r8 + eor r29,r9 + eor r2,r10 + eor r3,r11 + eor r4,r8 + eor r5,r9 + eor r6,r10 + eor r7,r11 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + std Z+36,r4 + std Z+37,r5 + std Z+38,r6 + std Z+39,r7 + std Z+52,r28 + std Z+53,r29 + std Z+54,r2 + std Z+55,r3 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r20,Z+10 + ldd r21,Z+11 + ldd r22,Z+24 + ldd r23,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+56 + ldd r5,Z+57 + ldd r6,Z+58 + ldd r7,Z+59 + movw r8,r18 + movw r10,r20 + eor r8,r22 + eor r9,r23 + eor r10,r26 + eor r11,r27 + movw r12,r28 + movw r14,r2 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + eor r18,r12 + eor r19,r13 + eor r20,r14 + eor r21,r15 + eor r22,r12 + eor r23,r13 + eor r26,r14 + eor r27,r15 + eor r28,r8 + eor r29,r9 + eor r2,r10 + eor r3,r11 + eor r4,r8 + eor r5,r9 + eor r6,r10 + eor r7,r11 + std Z+8,r22 + std Z+9,r23 + std Z+10,r26 + std Z+11,r27 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + std Z+40,r4 + std Z+41,r5 + std Z+42,r6 + std Z+43,r7 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r20,Z+14 + ldd r21,Z+15 + ldd r22,Z+28 + ldd r23,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + ldd r28,Z+44 + ldd r29,Z+45 + ldd r2,Z+46 + ldd r3,Z+47 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + movw r8,r18 + movw r10,r20 + eor r8,r22 + eor r9,r23 + eor r10,r26 + eor r11,r27 + movw r12,r28 + movw r14,r2 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + eor r18,r12 + eor r19,r13 + eor r20,r14 + eor r21,r15 + eor r22,r12 + eor r23,r13 + eor r26,r14 + eor r27,r15 + eor r28,r8 + eor r29,r9 + eor r2,r10 + eor r3,r11 + eor r4,r8 + eor r5,r9 + eor r6,r10 + eor r7,r11 + std Z+12,r22 + std Z+13,r23 + std Z+14,r26 + std Z+15,r27 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + std Z+60,r28 + std Z+61,r29 + std Z+62,r2 + std Z+63,r3 + ret +1795: + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size shadow512, .-shadow512 + + .text +.global shadow384 + .type shadow384, @function +shadow384: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 814f + rcall 858f + ldi r25,1 + eor r18,r25 + rcall 814f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 814f + rcall 858f + ldi r25,2 + eor r18,r25 + rcall 814f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 814f + rcall 858f + ldi r25,4 + eor r18,r25 + rcall 814f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + rcall 1284f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 814f + rcall 858f + ldi r25,1 + eor r28,r25 + rcall 814f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 814f + rcall 858f + ldi r25,2 + eor r28,r25 + rcall 814f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 814f + rcall 858f + ldi r25,4 + eor r28,r25 + rcall 814f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + rcall 1284f + ldd r18,Z+12 + ldi r25,1 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+28 + ldi r24,2 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+44 + ldi r17,4 + eor r18,r17 + std Z+44,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 814f + rcall 858f + ldi r25,1 + eor r18,r25 + eor r22,r25 + rcall 814f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 814f + rcall 858f + ldi r25,2 + eor r18,r25 + eor r22,r25 + rcall 814f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 814f + rcall 858f + ldi r25,4 + eor r18,r25 + eor r22,r25 + rcall 814f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + rcall 1284f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+8 + eor r18,r25 + std Z+8,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+24 + eor r18,r24 + std Z+24,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+40 + eor r18,r17 + std Z+40,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 814f + rcall 858f + ldi r25,1 + eor r28,r25 + eor r4,r25 + rcall 814f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 814f + rcall 858f + ldi r25,2 + eor r28,r25 + eor r4,r25 + rcall 814f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 814f + rcall 858f + ldi r25,4 + eor r28,r25 + eor r4,r25 + rcall 814f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + rcall 1284f + ld r18,Z + ldi r25,1 + eor r18,r25 + st Z,r18 + ldd r18,Z+4 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+12 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+16 + ldi r24,2 + eor r18,r24 + std Z+16,r18 + ldd r18,Z+20 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+28 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+32 + ldi r17,4 + eor r18,r17 + std Z+32,r18 + ldd r18,Z+36 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+44 + eor r18,r17 + std Z+44,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 814f + rcall 858f + ldi r25,1 + eor r18,r25 + eor r28,r25 + rcall 814f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 814f + rcall 858f + ldi r25,2 + eor r18,r25 + eor r28,r25 + rcall 814f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 814f + rcall 858f + ldi r25,4 + eor r18,r25 + eor r28,r25 + rcall 814f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + rcall 1284f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+12 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+28 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+44 + eor r18,r17 + std Z+44,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 814f + rcall 858f + ldi r25,1 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 814f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 814f + rcall 858f + ldi r25,2 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 814f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 814f + rcall 858f + ldi r25,4 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 814f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + rcall 1284f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+8 + eor r18,r25 + std Z+8,r18 + ldd r18,Z+12 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+24 + eor r18,r24 + std Z+24,r18 + ldd r18,Z+28 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+40 + eor r18,r17 + std Z+40,r18 + ldd r18,Z+44 + eor r18,r17 + std Z+44,r18 + rjmp 1430f +814: + movw r8,r18 + movw r10,r20 + and r8,r22 + and r9,r23 + and r10,r26 + and r11,r27 + eor r8,r28 + eor r9,r29 + eor r10,r2 + eor r11,r3 + movw r12,r4 + movw r14,r6 + and r12,r18 + and r13,r19 + and r14,r20 + and r15,r21 + eor r12,r22 + eor r13,r23 + eor r14,r26 + eor r15,r27 + movw r28,r8 + movw r2,r10 + and r28,r12 + and r29,r13 + and r2,r14 + and r3,r15 + eor r28,r4 + eor r29,r5 + eor r2,r6 + eor r3,r7 + and r4,r8 + and r5,r9 + and r6,r10 + and r7,r11 + eor r4,r18 + eor r5,r19 + eor r6,r20 + eor r7,r21 + movw r18,r12 + movw r20,r14 + movw r22,r8 + movw r26,r10 + ret +858: + mov r8,r19 + mov r9,r20 + mov r10,r21 + mov r11,r18 + mov r0,r1 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + or r11,r0 + eor r8,r18 + eor r9,r19 + eor r10,r20 + eor r11,r21 + mov r12,r23 + mov r13,r26 + mov r14,r27 + mov r15,r22 + mov r0,r1 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + or r15,r0 + eor r12,r22 + eor r13,r23 + eor r14,r26 + eor r15,r27 + movw r24,r8 + movw r16,r10 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r8,r24 + eor r9,r25 + eor r10,r16 + eor r11,r17 + movw r24,r12 + movw r16,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r12,r24 + eor r13,r25 + eor r14,r16 + eor r15,r17 + mov r0,r20 + mov r20,r18 + mov r18,r0 + mov r0,r21 + mov r21,r19 + mov r19,r0 + bst r18,0 + lsr r21 + ror r20 + ror r19 + ror r18 + bld r21,7 + eor r18,r8 + eor r19,r9 + eor r20,r10 + eor r21,r11 + mov r0,r26 + mov r26,r22 + mov r22,r0 + mov r0,r27 + mov r27,r23 + mov r23,r0 + bst r22,0 + lsr r27 + ror r26 + ror r23 + ror r22 + bld r27,7 + eor r22,r12 + eor r23,r13 + eor r26,r14 + eor r27,r15 + movw r8,r18 + movw r10,r20 + lsl r8 + rol r9 + rol r10 + rol r11 + adc r8,r1 + eor r8,r18 + eor r9,r19 + eor r10,r20 + eor r11,r21 + movw r12,r22 + movw r14,r26 + lsl r12 + rol r13 + rol r14 + rol r15 + adc r12,r1 + eor r12,r22 + eor r13,r23 + eor r14,r26 + eor r15,r27 + mov r24,r15 + mov r25,r12 + mov r16,r13 + mov r17,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r18,r24 + eor r19,r25 + eor r20,r16 + eor r21,r17 + mov r24,r11 + mov r25,r8 + mov r16,r9 + mov r17,r10 + bst r24,0 + lsr r17 + ror r16 + ror r25 + ror r24 + bld r17,7 + eor r22,r24 + eor r23,r25 + eor r26,r16 + eor r27,r17 + lsl r10 + rol r11 + rol r8 + rol r9 + adc r10,r1 + eor r18,r10 + eor r19,r11 + eor r20,r8 + eor r21,r9 + lsl r14 + rol r15 + rol r12 + rol r13 + adc r14,r1 + eor r22,r14 + eor r23,r15 + eor r26,r12 + eor r27,r13 + mov r8,r29 + mov r9,r2 + mov r10,r3 + mov r11,r28 + mov r0,r1 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + or r11,r0 + eor r8,r28 + eor r9,r29 + eor r10,r2 + eor r11,r3 + mov r12,r5 + mov r13,r6 + mov r14,r7 + mov r15,r4 + mov r0,r1 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + or r15,r0 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + movw r24,r8 + movw r16,r10 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r8,r24 + eor r9,r25 + eor r10,r16 + eor r11,r17 + movw r24,r12 + movw r16,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r12,r24 + eor r13,r25 + eor r14,r16 + eor r15,r17 + mov r0,r2 + mov r2,r28 + mov r28,r0 + mov r0,r3 + mov r3,r29 + mov r29,r0 + bst r28,0 + lsr r3 + ror r2 + ror r29 + ror r28 + bld r3,7 + eor r28,r8 + eor r29,r9 + eor r2,r10 + eor r3,r11 + mov r0,r6 + mov r6,r4 + mov r4,r0 + mov r0,r7 + mov r7,r5 + mov r5,r0 + bst r4,0 + lsr r7 + ror r6 + ror r5 + ror r4 + bld r7,7 + eor r4,r12 + eor r5,r13 + eor r6,r14 + eor r7,r15 + movw r8,r28 + movw r10,r2 + lsl r8 + rol r9 + rol r10 + rol r11 + adc r8,r1 + eor r8,r28 + eor r9,r29 + eor r10,r2 + eor r11,r3 + movw r12,r4 + movw r14,r6 + lsl r12 + rol r13 + rol r14 + rol r15 + adc r12,r1 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + mov r24,r15 + mov r25,r12 + mov r16,r13 + mov r17,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r28,r24 + eor r29,r25 + eor r2,r16 + eor r3,r17 + mov r24,r11 + mov r25,r8 + mov r16,r9 + mov r17,r10 + bst r24,0 + lsr r17 + ror r16 + ror r25 + ror r24 + bld r17,7 + eor r4,r24 + eor r5,r25 + eor r6,r16 + eor r7,r17 + lsl r10 + rol r11 + rol r8 + rol r9 + adc r10,r1 + eor r28,r10 + eor r29,r11 + eor r2,r8 + eor r3,r9 + lsl r14 + rol r15 + rol r12 + rol r13 + adc r14,r1 + eor r4,r14 + eor r5,r15 + eor r6,r12 + eor r7,r13 + ret +1284: + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+16 + ldd r23,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + ldd r28,Z+32 + ldd r29,Z+33 + ldd r2,Z+34 + ldd r3,Z+35 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + std Z+32,r22 + std Z+33,r23 + std Z+34,r26 + std Z+35,r27 + eor r22,r28 + eor r23,r29 + eor r26,r2 + eor r27,r3 + st Z,r22 + std Z+1,r23 + std Z+2,r26 + std Z+3,r27 + eor r18,r28 + eor r19,r29 + eor r20,r2 + eor r21,r3 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r20,Z+6 + ldd r21,Z+7 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+36 + ldd r29,Z+37 + ldd r2,Z+38 + ldd r3,Z+39 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + eor r22,r28 + eor r23,r29 + eor r26,r2 + eor r27,r3 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + eor r18,r28 + eor r19,r29 + eor r20,r2 + eor r21,r3 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r20,Z+10 + ldd r21,Z+11 + ldd r22,Z+24 + ldd r23,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + std Z+40,r22 + std Z+41,r23 + std Z+42,r26 + std Z+43,r27 + eor r22,r28 + eor r23,r29 + eor r26,r2 + eor r27,r3 + std Z+8,r22 + std Z+9,r23 + std Z+10,r26 + std Z+11,r27 + eor r18,r28 + eor r19,r29 + eor r20,r2 + eor r21,r3 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r20,Z+14 + ldd r21,Z+15 + ldd r22,Z+28 + ldd r23,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + ldd r28,Z+44 + ldd r29,Z+45 + ldd r2,Z+46 + ldd r3,Z+47 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + std Z+44,r22 + std Z+45,r23 + std Z+46,r26 + std Z+47,r27 + eor r22,r28 + eor r23,r29 + eor r26,r2 + eor r27,r3 + std Z+12,r22 + std Z+13,r23 + std Z+14,r26 + std Z+15,r27 + eor r18,r28 + eor r19,r29 + eor r20,r2 + eor r21,r3 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ret +1430: + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size shadow384, .-shadow384 + +#endif diff --git a/spook/Implementations/crypto_aead/spook128su512v1/rhys/internal-spook.c b/spook/Implementations/crypto_aead/spook128su512v1/rhys/internal-spook.c index 0e19216..068938b 100644 --- a/spook/Implementations/crypto_aead/spook128su512v1/rhys/internal-spook.c +++ b/spook/Implementations/crypto_aead/spook128su512v1/rhys/internal-spook.c @@ -22,6 +22,8 @@ #include "internal-spook.h" +#if !defined(__AVR__) + /** * \brief Number of steps in the Clyde-128 block cipher. * @@ -43,9 +45,9 @@ static uint8_t const rc[CLYDE128_STEPS][8] = { }; void clyde128_encrypt(const unsigned char key[CLYDE128_KEY_SIZE], - const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4], uint32_t output[CLYDE128_BLOCK_SIZE / 4], - const uint32_t input[CLYDE128_BLOCK_SIZE / 4]) + const uint32_t input[CLYDE128_BLOCK_SIZE / 4], + const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4]) { uint32_t k0, k1, k2, k3; uint32_t t0, t1, t2, t3; @@ -154,9 +156,9 @@ void clyde128_encrypt(const unsigned char key[CLYDE128_KEY_SIZE], } void clyde128_decrypt(const unsigned char key[CLYDE128_KEY_SIZE], - const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4], uint32_t output[CLYDE128_BLOCK_SIZE / 4], - const unsigned char input[CLYDE128_BLOCK_SIZE]) + const unsigned char input[CLYDE128_BLOCK_SIZE], + const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4]) { uint32_t k0, k1, k2, k3; uint32_t t0, t1, t2, t3; @@ -555,3 +557,5 @@ void shadow384(shadow384_state_t *state) le_store_word32(state->B + 44, s23); #endif } + +#endif /* !__AVR__ */ diff --git a/spook/Implementations/crypto_aead/spook128su512v1/rhys/internal-spook.h b/spook/Implementations/crypto_aead/spook128su512v1/rhys/internal-spook.h index b08ce80..77c8b86 100644 --- a/spook/Implementations/crypto_aead/spook128su512v1/rhys/internal-spook.h +++ b/spook/Implementations/crypto_aead/spook128su512v1/rhys/internal-spook.h @@ -93,31 +93,31 @@ typedef union * \brief Encrypts a block with the Clyde-128 block cipher. * * \param key Points to the key to encrypt with. - * \param tweak Points to the tweak to encrypt with. * \param output Output buffer for the ciphertext. * \param input Input buffer for the plaintext. + * \param tweak Points to the tweak to encrypt with. * * \sa clyde128_decrypt() */ void clyde128_encrypt(const unsigned char key[CLYDE128_KEY_SIZE], - const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4], uint32_t output[CLYDE128_BLOCK_SIZE / 4], - const uint32_t input[CLYDE128_BLOCK_SIZE / 4]); + const uint32_t input[CLYDE128_BLOCK_SIZE / 4], + const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4]); /** * \brief Decrypts a block with the Clyde-128 block cipher. * * \param key Points to the key to decrypt with. - * \param tweak Points to the tweak to decrypt with. * \param output Output buffer for the plaintext. * \param input Input buffer for the ciphertext. + * \param tweak Points to the tweak to decrypt with. * * \sa clyde128_encrypt() */ void clyde128_decrypt(const unsigned char key[CLYDE128_KEY_SIZE], - const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4], uint32_t output[CLYDE128_BLOCK_SIZE / 4], - const unsigned char input[CLYDE128_BLOCK_SIZE]); + const unsigned char input[CLYDE128_BLOCK_SIZE], + const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4]); /** * \brief Performs the Shadow-512 permutation on a state. diff --git a/spook/Implementations/crypto_aead/spook128su512v1/rhys/spook.c b/spook/Implementations/crypto_aead/spook128su512v1/rhys/spook.c index d075b33..2dbab94 100644 --- a/spook/Implementations/crypto_aead/spook128su512v1/rhys/spook.c +++ b/spook/Implementations/crypto_aead/spook128su512v1/rhys/spook.c @@ -86,7 +86,7 @@ static void spook_128_512_init state->B[CLYDE128_BLOCK_SIZE - 1] |= 0x40; } memcpy(state->B + CLYDE128_BLOCK_SIZE, npub, CLYDE128_BLOCK_SIZE); - clyde128_encrypt(k, state->W, state->W + 12, state->W + 4); + clyde128_encrypt(k, state->W + 12, state->W + 4, state->W); shadow512(state); } @@ -111,7 +111,7 @@ static void spook_128_384_init state->B[CLYDE128_BLOCK_SIZE - 1] |= 0x40; } memcpy(state->B + CLYDE128_BLOCK_SIZE, npub, CLYDE128_BLOCK_SIZE); - clyde128_encrypt(k, state->W, state->W + 8, state->W + 4); + clyde128_encrypt(k, state->W + 8, state->W + 4, state->W); shadow384(state); } @@ -310,7 +310,7 @@ int spook_128_512_su_aead_encrypt /* Compute the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_encrypt(k, state.W + 4, state.W, state.W); + clyde128_encrypt(k, state.W, state.W, state.W + 4); memcpy(c + mlen, state.B, SPOOK_TAG_SIZE); return 0; } @@ -345,7 +345,7 @@ int spook_128_512_su_aead_decrypt /* Check the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_decrypt(k, state.W + 4, state.W + 4, c + clen); + clyde128_decrypt(k, state.W + 4, c + clen, state.W + 4); return aead_check_tag (m, clen, state.B, state.B + CLYDE128_BLOCK_SIZE, SPOOK_TAG_SIZE); } @@ -377,7 +377,7 @@ int spook_128_384_su_aead_encrypt /* Compute the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_encrypt(k, state.W + 4, state.W, state.W); + clyde128_encrypt(k, state.W, state.W, state.W + 4); memcpy(c + mlen, state.B, SPOOK_TAG_SIZE); return 0; } @@ -412,7 +412,7 @@ int spook_128_384_su_aead_decrypt /* Check the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_decrypt(k, state.W + 4, state.W + 4, c + clen); + clyde128_decrypt(k, state.W + 4, c + clen, state.W + 4); return aead_check_tag (m, clen, state.B, state.B + CLYDE128_BLOCK_SIZE, SPOOK_TAG_SIZE); } @@ -444,7 +444,7 @@ int spook_128_512_mu_aead_encrypt /* Compute the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_encrypt(k, state.W + 4, state.W, state.W); + clyde128_encrypt(k, state.W, state.W, state.W + 4); memcpy(c + mlen, state.B, SPOOK_TAG_SIZE); return 0; } @@ -479,7 +479,7 @@ int spook_128_512_mu_aead_decrypt /* Check the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_decrypt(k, state.W + 4, state.W + 4, c + clen); + clyde128_decrypt(k, state.W + 4, c + clen, state.W + 4); return aead_check_tag (m, clen, state.B, state.B + CLYDE128_BLOCK_SIZE, SPOOK_TAG_SIZE); } @@ -511,7 +511,7 @@ int spook_128_384_mu_aead_encrypt /* Compute the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_encrypt(k, state.W + 4, state.W, state.W); + clyde128_encrypt(k, state.W, state.W, state.W + 4); memcpy(c + mlen, state.B, SPOOK_TAG_SIZE); return 0; } @@ -546,7 +546,7 @@ int spook_128_384_mu_aead_decrypt /* Check the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_decrypt(k, state.W + 4, state.W + 4, c + clen); + clyde128_decrypt(k, state.W + 4, c + clen, state.W + 4); return aead_check_tag (m, clen, state.B, state.B + CLYDE128_BLOCK_SIZE, SPOOK_TAG_SIZE); } diff --git a/subterranean/Implementations/crypto_aead/subterraneanv1/rhys/internal-subterranean-avr.S b/subterranean/Implementations/crypto_aead/subterraneanv1/rhys/internal-subterranean-avr.S new file mode 100644 index 0000000..6380870 --- /dev/null +++ b/subterranean/Implementations/crypto_aead/subterraneanv1/rhys/internal-subterranean-avr.S @@ -0,0 +1,1632 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .text +.global subterranean_round + .type subterranean_round, @function +subterranean_round: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r20,Z + ldd r21,Z+1 + ldd r22,Z+2 + ldd r23,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r2,Z+6 + ldd r3,Z+7 + mov r18,r20 + lsl r18 + ldd r0,Z+32 + eor r18,r0 + movw r4,r20 + movw r6,r22 + movw r8,r26 + movw r10,r2 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r15 + ror r14 + ror r13 + ror r12 + com r4 + com r5 + com r6 + com r7 + com r8 + com r9 + com r10 + and r4,r12 + and r5,r13 + and r6,r14 + and r7,r15 + and r8,r24 + and r9,r25 + and r10,r16 + eor r20,r4 + eor r21,r5 + eor r22,r6 + eor r23,r7 + eor r26,r8 + eor r27,r9 + eor r2,r10 + ldi r25,1 + eor r20,r25 + std Y+1,r20 + std Y+2,r21 + std Y+3,r22 + std Y+4,r23 + std Y+5,r26 + std Y+6,r27 + std Y+7,r2 + mov r20,r3 + ldd r21,Z+8 + ldd r22,Z+9 + ldd r23,Z+10 + ldd r26,Z+11 + ldd r27,Z+12 + ldd r2,Z+13 + ldd r3,Z+14 + movw r4,r20 + movw r6,r22 + movw r8,r26 + movw r10,r2 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r15 + ror r14 + ror r13 + ror r12 + com r4 + com r5 + com r6 + com r7 + com r8 + com r9 + com r10 + and r4,r12 + and r5,r13 + and r6,r14 + and r7,r15 + and r8,r24 + and r9,r25 + and r10,r16 + eor r20,r4 + eor r21,r5 + eor r22,r6 + eor r23,r7 + eor r26,r8 + eor r27,r9 + eor r2,r10 + std Y+8,r20 + std Y+9,r21 + std Y+10,r22 + std Y+11,r23 + std Y+12,r26 + std Y+13,r27 + std Y+14,r2 + mov r20,r3 + ldd r21,Z+15 + ldd r22,Z+16 + ldd r23,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + ldd r2,Z+20 + ldd r3,Z+21 + movw r4,r20 + movw r6,r22 + movw r8,r26 + movw r10,r2 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r15 + ror r14 + ror r13 + ror r12 + com r4 + com r5 + com r6 + com r7 + com r8 + com r9 + com r10 + and r4,r12 + and r5,r13 + and r6,r14 + and r7,r15 + and r8,r24 + and r9,r25 + and r10,r16 + eor r20,r4 + eor r21,r5 + eor r22,r6 + eor r23,r7 + eor r26,r8 + eor r27,r9 + eor r2,r10 + std Y+15,r20 + std Y+16,r21 + std Y+17,r22 + std Y+18,r23 + std Y+19,r26 + std Y+20,r27 + std Y+21,r2 + mov r20,r3 + ldd r21,Z+22 + ldd r22,Z+23 + ldd r23,Z+24 + ldd r26,Z+25 + ldd r27,Z+26 + ldd r2,Z+27 + ldd r3,Z+28 + movw r4,r20 + movw r6,r22 + movw r8,r26 + movw r10,r2 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r15 + ror r14 + ror r13 + ror r12 + com r4 + com r5 + com r6 + com r7 + com r8 + com r9 + com r10 + and r4,r12 + and r5,r13 + and r6,r14 + and r7,r15 + and r8,r24 + and r9,r25 + and r10,r16 + eor r20,r4 + eor r21,r5 + eor r22,r6 + eor r23,r7 + eor r26,r8 + eor r27,r9 + eor r2,r10 + std Y+22,r20 + std Y+23,r21 + std Y+24,r22 + std Y+25,r23 + std Y+26,r26 + std Y+27,r27 + std Y+28,r2 + mov r20,r3 + ldd r21,Z+29 + ldd r22,Z+30 + ldd r23,Z+31 + mov r26,r18 + movw r4,r20 + movw r6,r22 + mov r8,r26 + lsr r8 + ror r7 + ror r6 + ror r5 + ror r4 + movw r12,r4 + movw r14,r6 + mov r24,r8 + lsr r24 + ror r15 + ror r14 + ror r13 + ror r12 + com r4 + com r5 + com r6 + com r7 + and r4,r12 + and r5,r13 + and r6,r14 + and r7,r15 + eor r20,r4 + eor r21,r5 + eor r22,r6 + eor r23,r7 + std Y+29,r20 + std Y+30,r21 + std Y+31,r22 + std Y+32,r23 + mov r20,r18 + lsr r20 + mov r21,r20 + lsr r21 + com r20 + and r20,r21 + eor r18,r20 + andi r18,1 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r2,Y+7 + ldd r3,Y+8 + movw r4,r20 + lsl r4 + rol r5 + eor r18,r4 + mov r19,r5 + movw r4,r20 + movw r6,r22 + movw r8,r26 + movw r10,r2 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + eor r20,r21 + eor r21,r22 + eor r22,r23 + eor r23,r26 + eor r26,r27 + eor r27,r2 + eor r2,r3 + eor r20,r4 + eor r21,r5 + eor r22,r6 + eor r23,r7 + eor r26,r8 + eor r27,r9 + eor r2,r10 + std Y+1,r20 + std Y+2,r21 + std Y+3,r22 + std Y+4,r23 + std Y+5,r26 + std Y+6,r27 + std Y+7,r2 + mov r20,r3 + ldd r21,Y+9 + ldd r22,Y+10 + ldd r23,Y+11 + ldd r26,Y+12 + ldd r27,Y+13 + ldd r2,Y+14 + ldd r3,Y+15 + movw r4,r20 + movw r6,r22 + movw r8,r26 + movw r10,r2 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + eor r20,r21 + eor r21,r22 + eor r22,r23 + eor r23,r26 + eor r26,r27 + eor r27,r2 + eor r2,r3 + eor r20,r4 + eor r21,r5 + eor r22,r6 + eor r23,r7 + eor r26,r8 + eor r27,r9 + eor r2,r10 + std Y+8,r20 + std Y+9,r21 + std Y+10,r22 + std Y+11,r23 + std Y+12,r26 + std Y+13,r27 + std Y+14,r2 + mov r20,r3 + ldd r21,Y+16 + ldd r22,Y+17 + ldd r23,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r2,Y+21 + ldd r3,Y+22 + movw r4,r20 + movw r6,r22 + movw r8,r26 + movw r10,r2 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + eor r20,r21 + eor r21,r22 + eor r22,r23 + eor r23,r26 + eor r26,r27 + eor r27,r2 + eor r2,r3 + eor r20,r4 + eor r21,r5 + eor r22,r6 + eor r23,r7 + eor r26,r8 + eor r27,r9 + eor r2,r10 + std Y+15,r20 + std Y+16,r21 + std Y+17,r22 + std Y+18,r23 + std Y+19,r26 + std Y+20,r27 + std Y+21,r2 + mov r20,r3 + ldd r21,Y+23 + ldd r22,Y+24 + ldd r23,Y+25 + ldd r26,Y+26 + ldd r27,Y+27 + ldd r2,Y+28 + ldd r3,Y+29 + movw r4,r20 + movw r6,r22 + movw r8,r26 + movw r10,r2 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + eor r20,r21 + eor r21,r22 + eor r22,r23 + eor r23,r26 + eor r26,r27 + eor r27,r2 + eor r2,r3 + eor r20,r4 + eor r21,r5 + eor r22,r6 + eor r23,r7 + eor r26,r8 + eor r27,r9 + eor r2,r10 + std Y+22,r20 + std Y+23,r21 + std Y+24,r22 + std Y+25,r23 + std Y+26,r26 + std Y+27,r27 + std Y+28,r2 + mov r20,r3 + ldd r21,Y+30 + ldd r22,Y+31 + ldd r23,Y+32 + mov r26,r18 + movw r4,r20 + movw r6,r22 + mov r8,r26 + lsr r8 + ror r7 + ror r6 + ror r5 + ror r4 + lsr r8 + ror r7 + ror r6 + ror r5 + ror r4 + lsr r8 + ror r7 + ror r6 + ror r5 + ror r4 + eor r20,r21 + eor r21,r22 + eor r22,r23 + eor r23,r26 + eor r20,r4 + eor r21,r5 + eor r22,r6 + eor r23,r7 + std Y+29,r20 + std Y+30,r21 + std Y+31,r22 + std Y+32,r23 + mov r20,r18 + lsr r20 + lsr r20 + lsr r20 + eor r18,r19 + eor r18,r20 + ldd r17,Y+1 + bst r17,0 + bld r20,0 + bst r17,1 + bld r14,6 + bst r17,2 + bld r27,3 + bst r17,4 + bld r6,6 + bst r17,6 + bld r12,1 + bst r17,7 + bld r22,6 + ldd r17,Y+2 + bst r17,0 + bld r25,4 + bst r17,1 + bld r4,1 + bst r17,3 + bld r9,4 + bst r17,4 + bld r20,1 + bst r17,5 + bld r14,7 + bst r17,6 + bld r27,4 + ldd r17,Y+3 + bst r17,0 + bld r6,7 + bst r17,2 + bld r12,2 + bst r17,3 + bld r22,7 + bst r17,4 + bld r25,5 + bst r17,5 + bld r4,2 + bst r17,7 + bld r9,5 + ldd r17,Y+4 + bst r17,0 + bld r20,2 + bst r17,1 + bld r15,0 + bst r17,2 + bld r27,5 + bst r17,4 + bld r7,0 + bst r17,6 + bld r12,3 + bst r17,7 + bld r23,0 + ldd r17,Y+5 + bst r17,0 + bld r25,6 + bst r17,1 + bld r4,3 + bst r17,3 + bld r9,6 + bst r17,4 + bld r20,3 + bst r17,5 + bld r15,1 + bst r17,6 + bld r27,6 + ldd r17,Y+6 + bst r17,0 + bld r7,1 + bst r17,2 + bld r12,4 + bst r17,3 + bld r23,1 + bst r17,4 + bld r25,7 + bst r17,5 + bld r4,4 + bst r17,7 + bld r9,7 + ldd r17,Y+7 + bst r17,0 + bld r20,4 + bst r17,1 + bld r15,2 + bst r17,2 + bld r27,7 + bst r17,4 + bld r7,2 + bst r17,6 + bld r12,5 + bst r17,7 + bld r23,2 + ldd r17,Y+8 + bst r17,0 + bld r16,0 + bst r17,1 + bld r4,5 + bst r17,3 + bld r10,0 + bst r17,4 + bld r20,5 + bst r17,5 + bld r15,3 + bst r17,6 + bld r2,0 + ldd r17,Y+9 + bst r17,0 + bld r7,3 + bst r17,2 + bld r12,6 + bst r17,3 + bld r23,3 + bst r17,4 + bld r16,1 + bst r17,5 + bld r4,6 + bst r17,7 + bld r10,1 + ldd r17,Y+10 + bst r17,0 + bld r20,6 + bst r17,1 + bld r15,4 + bst r17,2 + bld r2,1 + bst r17,4 + bld r7,4 + bst r17,6 + bld r12,7 + bst r17,7 + bld r23,4 + ldd r17,Y+11 + bst r17,0 + bld r16,2 + bst r17,1 + bld r4,7 + bst r17,3 + bld r10,2 + bst r17,4 + bld r20,7 + bst r17,5 + bld r15,5 + bst r17,6 + bld r2,2 + ldd r17,Y+12 + bst r17,0 + bld r7,5 + bst r17,2 + bld r13,0 + bst r17,3 + bld r23,5 + bst r17,4 + bld r16,3 + bst r17,5 + bld r5,0 + bst r17,7 + bld r10,3 + ldd r17,Y+13 + bst r17,0 + bld r21,0 + bst r17,1 + bld r15,6 + bst r17,2 + bld r2,3 + bst r17,4 + bld r7,6 + bst r17,6 + bld r13,1 + bst r17,7 + bld r23,6 + ldd r17,Y+14 + bst r17,0 + bld r16,4 + bst r17,1 + bld r5,1 + bst r17,3 + bld r10,4 + bst r17,4 + bld r21,1 + bst r17,5 + bld r15,7 + bst r17,6 + bld r2,4 + ldd r17,Y+15 + bst r17,0 + bld r7,7 + bst r17,2 + bld r13,2 + bst r17,3 + bld r23,7 + bst r17,4 + bld r16,5 + bst r17,5 + bld r5,2 + bst r17,7 + bld r10,5 + ldd r17,Y+16 + bst r17,0 + bld r21,2 + bst r17,1 + bld r24,0 + bst r17,2 + bld r2,5 + bst r17,4 + bld r8,0 + bst r17,6 + bld r13,3 + bst r17,7 + bld r26,0 + ldd r17,Y+17 + bst r17,0 + bld r16,6 + bst r17,1 + bld r5,3 + bst r17,3 + bld r10,6 + bst r17,4 + bld r21,3 + bst r17,5 + bld r24,1 + bst r17,6 + bld r2,6 + ldd r17,Y+18 + bst r17,0 + bld r8,1 + bst r17,2 + bld r13,4 + bst r17,3 + bld r26,1 + bst r17,4 + bld r16,7 + bst r17,5 + bld r5,4 + bst r17,7 + bld r10,7 + ldd r17,Y+19 + bst r17,0 + bld r21,4 + bst r17,1 + bld r24,2 + bst r17,2 + bld r2,7 + bst r17,4 + bld r8,2 + bst r17,6 + bld r13,5 + bst r17,7 + bld r26,2 + ldd r17,Y+20 + bst r17,1 + bld r5,5 + bst r17,3 + bld r11,0 + bst r17,4 + bld r21,5 + bst r17,5 + bld r24,3 + bst r17,6 + bld r3,0 + ldd r17,Y+21 + bst r17,0 + bld r8,3 + bst r17,2 + bld r13,6 + bst r17,3 + bld r26,3 + bst r17,5 + bld r5,6 + bst r17,7 + bld r11,1 + ldd r17,Y+22 + bst r17,0 + bld r21,6 + bst r17,1 + bld r24,4 + bst r17,2 + bld r3,1 + bst r17,4 + bld r8,4 + bst r17,6 + bld r13,7 + bst r17,7 + bld r26,4 + ldd r17,Y+23 + bst r17,1 + bld r5,7 + bst r17,3 + bld r11,2 + bst r17,4 + bld r21,7 + bst r17,5 + bld r24,5 + bst r17,6 + bld r3,2 + ldd r17,Y+24 + bst r17,0 + bld r8,5 + bst r17,2 + bld r14,0 + bst r17,3 + bld r26,5 + bst r17,5 + bld r6,0 + bst r17,7 + bld r11,3 + ldd r17,Y+25 + bst r17,0 + bld r22,0 + bst r17,1 + bld r24,6 + bst r17,2 + bld r3,3 + bst r17,4 + bld r8,6 + bst r17,6 + bld r14,1 + bst r17,7 + bld r26,6 + ldd r17,Y+26 + bst r17,1 + bld r6,1 + bst r17,3 + bld r11,4 + bst r17,4 + bld r22,1 + bst r17,5 + bld r24,7 + bst r17,6 + bld r3,4 + ldd r17,Y+27 + bst r17,0 + bld r8,7 + bst r17,2 + bld r14,2 + bst r17,3 + bld r26,7 + bst r17,5 + bld r6,2 + bst r17,7 + bld r11,5 + ldd r17,Y+28 + bst r17,0 + bld r22,2 + bst r17,1 + bld r25,0 + bst r17,2 + bld r3,5 + bst r17,4 + bld r9,0 + bst r17,6 + bld r14,3 + bst r17,7 + bld r27,0 + ldd r17,Y+29 + bst r17,1 + bld r6,3 + bst r17,3 + bld r11,6 + bst r17,4 + bld r22,3 + bst r17,5 + bld r25,1 + bst r17,6 + bld r3,6 + ldd r17,Y+30 + bst r17,0 + bld r9,1 + bst r17,2 + bld r14,4 + bst r17,3 + bld r27,1 + bst r17,5 + bld r6,4 + bst r17,7 + bld r11,7 + ldd r17,Y+31 + bst r17,0 + bld r22,4 + bst r17,1 + bld r25,2 + bst r17,2 + bld r3,7 + bst r17,4 + bld r9,2 + bst r17,6 + bld r14,5 + bst r17,7 + bld r27,2 + ldd r17,Y+32 + bst r17,1 + bld r6,5 + bst r17,3 + bld r12,0 + bst r17,4 + bld r22,5 + bst r17,5 + bld r25,3 + bst r17,6 + bld r4,0 + bst r18,0 + bld r9,3 + st Z,r20 + std Z+1,r21 + std Z+2,r22 + std Z+3,r23 + std Z+4,r26 + std Z+5,r27 + std Z+6,r2 + std Z+7,r3 + std Z+8,r4 + std Z+9,r5 + std Z+10,r6 + std Z+11,r7 + std Z+12,r8 + std Z+13,r9 + std Z+14,r10 + std Z+15,r11 + std Z+16,r12 + std Z+17,r13 + std Z+18,r14 + std Z+19,r15 + std Z+20,r24 + std Z+21,r25 + std Z+22,r16 + mov r5,r1 + ldd r17,Y+1 + bst r17,3 + bld r21,1 + bst r17,5 + bld r2,4 + ldd r17,Y+2 + bst r17,2 + bld r23,7 + bst r17,7 + bld r21,2 + ldd r17,Y+3 + bst r17,1 + bld r2,5 + bst r17,6 + bld r26,0 + ldd r17,Y+4 + bst r17,3 + bld r21,3 + bst r17,5 + bld r2,6 + ldd r17,Y+5 + bst r17,2 + bld r26,1 + bst r17,7 + bld r21,4 + ldd r17,Y+6 + bst r17,1 + bld r2,7 + bst r17,6 + bld r26,2 + ldd r17,Y+7 + bst r17,3 + bld r21,5 + bst r17,5 + bld r3,0 + ldd r17,Y+8 + bst r17,2 + bld r26,3 + bst r17,7 + bld r21,6 + ldd r17,Y+9 + bst r17,1 + bld r3,1 + bst r17,6 + bld r26,4 + ldd r17,Y+10 + bst r17,3 + bld r21,7 + bst r17,5 + bld r3,2 + ldd r17,Y+11 + bst r17,2 + bld r26,5 + bst r17,7 + bld r22,0 + ldd r17,Y+12 + bst r17,1 + bld r3,3 + bst r17,6 + bld r26,6 + ldd r17,Y+13 + bst r17,3 + bld r22,1 + bst r17,5 + bld r3,4 + ldd r17,Y+14 + bst r17,2 + bld r26,7 + bst r17,7 + bld r22,2 + ldd r17,Y+15 + bst r17,1 + bld r3,5 + bst r17,6 + bld r27,0 + ldd r17,Y+16 + bst r17,3 + bld r22,3 + bst r17,5 + bld r3,6 + ldd r17,Y+17 + bst r17,2 + bld r27,1 + bst r17,7 + bld r22,4 + ldd r17,Y+18 + bst r17,1 + bld r3,7 + bst r17,6 + bld r27,2 + ldd r17,Y+19 + bst r17,3 + bld r22,5 + bst r17,5 + bld r4,0 + ldd r17,Y+20 + bst r17,0 + bld r20,0 + bst r17,2 + bld r27,3 + bst r17,7 + bld r22,6 + ldd r17,Y+21 + bst r17,1 + bld r4,1 + bst r17,4 + bld r20,1 + bst r17,6 + bld r27,4 + ldd r17,Y+22 + bst r17,3 + bld r22,7 + bst r17,5 + bld r4,2 + ldd r17,Y+23 + bst r17,0 + bld r20,2 + bst r17,2 + bld r27,5 + bst r17,7 + bld r23,0 + ldd r17,Y+24 + bst r17,1 + bld r4,3 + bst r17,4 + bld r20,3 + bst r17,6 + bld r27,6 + ldd r17,Y+25 + bst r17,3 + bld r23,1 + bst r17,5 + bld r4,4 + ldd r17,Y+26 + bst r17,0 + bld r20,4 + bst r17,2 + bld r27,7 + bst r17,7 + bld r23,2 + ldd r17,Y+27 + bst r17,1 + bld r4,5 + bst r17,4 + bld r20,5 + bst r17,6 + bld r2,0 + ldd r17,Y+28 + bst r17,3 + bld r23,3 + bst r17,5 + bld r4,6 + ldd r17,Y+29 + bst r17,0 + bld r20,6 + bst r17,2 + bld r2,1 + bst r17,7 + bld r23,4 + ldd r17,Y+30 + bst r17,1 + bld r4,7 + bst r17,4 + bld r20,7 + bst r17,6 + bld r2,2 + ldd r17,Y+31 + bst r17,3 + bld r23,5 + bst r17,5 + bld r5,0 + ldd r17,Y+32 + bst r17,0 + bld r21,0 + bst r17,2 + bld r2,3 + bst r17,7 + bld r23,6 + std Z+23,r20 + std Z+24,r21 + std Z+25,r22 + std Z+26,r23 + std Z+27,r26 + std Z+28,r27 + std Z+29,r2 + std Z+30,r3 + std Z+31,r4 + std Z+32,r5 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size subterranean_round, .-subterranean_round + + .text +.global subterranean_absorb_1 + .type subterranean_absorb_1, @function +subterranean_absorb_1: + movw r30,r24 +.L__stack_usage = 2 + ldi r23,1 + mov r18,r1 + bst r22,0 + bld r18,1 + ld r0,Z + eor r0,r18 + st Z,r0 + mov r18,r1 + bst r22,3 + bld r18,3 + ldd r0,Z+4 + eor r0,r18 + std Z+4,r0 + mov r18,r1 + bst r23,0 + bld r18,0 + ldd r0,Z+8 + eor r0,r18 + std Z+8,r0 + mov r18,r1 + bst r22,5 + bld r18,6 + ldd r0,Z+16 + eor r0,r18 + std Z+16,r0 + mov r18,r1 + bst r22,2 + bld r18,0 + ldd r0,Z+17 + eor r0,r18 + std Z+17,r0 + mov r18,r1 + bst r22,1 + bld r18,0 + ldd r0,Z+22 + eor r0,r18 + std Z+22,r0 + mov r18,r1 + bst r22,6 + bld r18,5 + ldd r0,Z+24 + eor r0,r18 + std Z+24,r0 + mov r18,r1 + bst r22,7 + bld r18,2 + ldd r0,Z+29 + eor r0,r18 + std Z+29,r0 + mov r18,r1 + bst r22,4 + bld r18,1 + ldd r0,Z+31 + eor r0,r18 + std Z+31,r0 + ret + .size subterranean_absorb_1, .-subterranean_absorb_1 + + .text +.global subterranean_absorb_word + .type subterranean_absorb_word, @function +subterranean_absorb_word: + movw r30,r24 +.L__stack_usage = 2 + mov r18,r1 + bst r20,0 + bld r18,1 + bst r21,4 + bld r18,2 + bst r23,0 + bld r18,4 + ld r0,Z + eor r0,r18 + st Z,r0 + mov r18,r1 + bst r22,1 + bld r18,3 + bst r21,6 + bld r18,7 + ldd r0,Z+1 + eor r0,r18 + std Z+1,r0 + mov r18,r1 + bst r23,6 + bld r18,1 + bst r23,5 + bld r18,6 + ldd r0,Z+2 + eor r0,r18 + std Z+2,r0 + mov r18,r1 + bst r23,2 + bld r18,6 + ldd r0,Z+3 + eor r0,r18 + std Z+3,r0 + mov r18,r1 + bst r20,3 + bld r18,3 + ldd r0,Z+4 + eor r0,r18 + std Z+4,r0 + mov r18,r1 + bst r21,0 + bld r18,0 + bst r21,7 + bld r18,6 + ldd r0,Z+8 + eor r0,r18 + std Z+8,r0 + mov r18,r1 + bst r21,5 + bld r18,7 + ldd r0,Z+11 + eor r0,r18 + std Z+11,r0 + mov r18,r1 + bst r22,7 + bld r18,7 + ldd r0,Z+13 + eor r0,r18 + std Z+13,r0 + mov r18,r1 + bst r22,4 + bld r18,0 + bst r20,5 + bld r18,6 + ldd r0,Z+16 + eor r0,r18 + std Z+16,r0 + mov r18,r1 + bst r20,2 + bld r18,0 + bst r22,2 + bld r18,1 + bst r23,3 + bld r18,4 + ldd r0,Z+17 + eor r0,r18 + std Z+17,r0 + mov r18,r1 + bst r23,7 + bld r18,5 + ldd r0,Z+20 + eor r0,r18 + std Z+20,r0 + mov r18,r1 + bst r22,5 + bld r18,1 + ldd r0,Z+21 + eor r0,r18 + std Z+21,r0 + mov r18,r1 + bst r20,1 + bld r18,0 + ldd r0,Z+22 + eor r0,r18 + std Z+22,r0 + mov r18,r1 + bst r21,3 + bld r18,0 + bst r22,6 + bld r18,5 + bst r23,1 + bld r18,6 + ldd r0,Z+23 + eor r0,r18 + std Z+23,r0 + mov r18,r1 + bst r20,6 + bld r18,5 + ldd r0,Z+24 + eor r0,r18 + std Z+24,r0 + mov r18,r1 + bst r22,3 + bld r18,3 + bst r21,1 + bld r18,5 + ldd r0,Z+26 + eor r0,r18 + std Z+26,r0 + mov r18,r1 + bst r21,2 + bld r18,7 + ldd r0,Z+27 + eor r0,r18 + std Z+27,r0 + mov r18,r1 + bst r23,4 + bld r18,1 + ldd r0,Z+28 + eor r0,r18 + std Z+28,r0 + mov r18,r1 + bst r20,7 + bld r18,2 + ldd r0,Z+29 + eor r0,r18 + std Z+29,r0 + mov r18,r1 + bst r22,0 + bld r18,1 + ldd r0,Z+30 + eor r0,r18 + std Z+30,r0 + mov r18,r1 + bst r20,4 + bld r18,1 + ldd r0,Z+31 + eor r0,r18 + std Z+31,r0 + ret + .size subterranean_absorb_word, .-subterranean_absorb_word + + .text +.global subterranean_extract + .type subterranean_extract, @function +subterranean_extract: + movw r30,r24 +.L__stack_usage = 2 + ld r26,Z + bst r26,1 + bld r22,0 + bst r26,2 + bld r23,4 + bst r26,4 + bld r25,0 + ldd r26,Z+1 + bst r26,0 + bld r18,4 + bst r26,3 + bld r24,1 + bst r26,7 + bld r23,6 + ldd r26,Z+2 + bst r26,0 + bld r20,0 + bst r26,1 + bld r25,6 + bst r26,6 + bld r25,5 + bst r26,7 + bld r18,7 + ldd r26,Z+3 + bst r26,6 + bld r25,2 + ldd r26,Z+4 + bst r26,0 + bld r21,4 + bst r26,2 + bld r19,2 + bst r26,3 + bld r22,3 + ldd r26,Z+5 + bst r26,4 + bld r19,1 + bst r26,6 + bld r20,3 + ldd r26,Z+7 + bst r26,4 + bld r18,6 + ldd r26,Z+8 + bst r26,0 + bld r23,0 + bst r26,3 + bld r21,1 + bst r26,4 + bld r20,6 + bst r26,6 + bld r23,7 + ldd r26,Z+9 + bst r26,1 + bld r19,3 + ldd r26,Z+10 + bst r26,1 + bld r18,1 + ldd r26,Z+11 + bst r26,0 + bld r20,5 + bst r26,4 + bld r21,7 + bst r26,7 + bld r23,5 + ldd r26,Z+13 + bst r26,7 + bld r24,7 + ldd r26,Z+14 + bst r26,5 + bld r21,3 + ldd r26,Z+15 + bst r26,0 + bld r20,2 + bst r26,1 + bld r18,2 + bst r26,3 + bld r18,5 + ldd r26,Z+16 + bst r26,0 + bld r24,4 + bst r26,1 + bld r20,4 + bst r26,6 + bld r22,5 + ldd r26,Z+17 + bst r26,0 + bld r22,2 + bst r26,1 + bld r24,2 + bst r26,4 + bld r25,3 + ldd r26,Z+18 + bst r26,2 + bld r20,7 + ldd r26,Z+20 + bst r26,2 + bld r19,5 + bst r26,5 + bld r25,7 + ldd r26,Z+21 + bst r26,1 + bld r24,5 + ldd r26,Z+22 + bst r26,0 + bld r22,1 + ldd r26,Z+23 + bst r26,0 + bld r23,3 + bst r26,3 + bld r19,7 + bst r26,5 + bld r24,6 + bst r26,6 + bld r25,1 + ldd r26,Z+24 + bst r26,1 + bld r19,0 + bst r26,5 + bld r22,6 + ldd r26,Z+26 + bst r26,3 + bld r24,3 + bst r26,5 + bld r23,1 + ldd r26,Z+27 + bst r26,6 + bld r18,3 + bst r26,7 + bld r23,2 + ldd r26,Z+28 + bst r26,1 + bld r25,4 + bst r26,3 + bld r21,2 + ldd r26,Z+29 + bst r26,2 + bld r22,7 + bst r26,3 + bld r21,5 + ldd r26,Z+30 + bst r26,0 + bld r21,6 + bst r26,1 + bld r24,0 + bst r26,2 + bld r19,6 + bst r26,6 + bld r20,1 + ldd r26,Z+31 + bst r26,1 + bld r22,4 + bst r26,5 + bld r21,0 + bst r26,7 + bld r19,4 + ldd r26,Z+32 + bst r26,0 + bld r18,0 + eor r22,r18 + eor r23,r19 + eor r24,r20 + eor r25,r21 + ret + .size subterranean_extract, .-subterranean_extract + +#endif diff --git a/subterranean/Implementations/crypto_aead/subterraneanv1/rhys/internal-subterranean.c b/subterranean/Implementations/crypto_aead/subterraneanv1/rhys/internal-subterranean.c index 1cb64e2..71b1c4c 100644 --- a/subterranean/Implementations/crypto_aead/subterraneanv1/rhys/internal-subterranean.c +++ b/subterranean/Implementations/crypto_aead/subterraneanv1/rhys/internal-subterranean.c @@ -23,6 +23,8 @@ #include "internal-subterranean.h" #include +#if !defined(__AVR__) + void subterranean_round(subterranean_state_t *state) { uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8; @@ -168,28 +170,10 @@ void subterranean_round(subterranean_state_t *state) state->x[8] = BDN(x7, 21, 0); } -void subterranean_blank(subterranean_state_t *state) -{ - unsigned round; - for (round = 0; round < 8; ++round) { - subterranean_round(state); - state->x[0] ^= 0x02; /* padding for an empty block is in state bit 1 */ - } -} - -void subterranean_duplex_0(subterranean_state_t *state) -{ - subterranean_round(state); - state->x[0] ^= 0x02; /* padding for an empty block is in state bit 1 */ -} - -void subterranean_duplex_1(subterranean_state_t *state, unsigned char data) +void subterranean_absorb_1(subterranean_state_t *state, unsigned char data) { uint32_t x = data; - /* Perform a single Subterranean round before absorbing the bits */ - subterranean_round(state); - /* Rearrange the bits and absorb them into the state */ state->x[0] ^= (x << 1) & 0x00000002U; state->x[1] ^= x & 0x00000008U; @@ -200,13 +184,10 @@ void subterranean_duplex_1(subterranean_state_t *state, unsigned char data) state->x[7] ^= ((x << 21) & 0x02000000U) ^ ((x << 3) & 0x00000400U); } -void subterranean_duplex_word(subterranean_state_t *state, uint32_t x) +void subterranean_absorb_word(subterranean_state_t *state, uint32_t x) { uint32_t y; - /* Perform a single Subterranean round before absorbing the bits */ - subterranean_round(state); - /* To absorb the word into the state, we first rearrange the source * bits to be in the right target bit positions. Then we mask and * XOR them into the relevant words of the state. @@ -258,39 +239,6 @@ void subterranean_duplex_word(subterranean_state_t *state, uint32_t x) state->x[7] ^= (y & 0x02000400U) ^ (x & 0x00020002U); } -void subterranean_duplex_n - (subterranean_state_t *state, const unsigned char *data, unsigned len) -{ - switch (len) { - case 0: - subterranean_duplex_0(state); - break; - case 1: - subterranean_duplex_1(state, data[0]); - break; - case 2: - /* Load 16 bits and add the padding bit to the 17th bit */ - subterranean_duplex_word - (state, ((uint32_t)(data[0]) | - (((uint32_t)(data[1])) << 8) | - 0x10000U)); - break; - case 3: - /* Load 24 bits and add the padding bit to the 25th bit */ - subterranean_duplex_word - (state, ((uint32_t)(data[0]) | - (((uint32_t)(data[1])) << 8) | - (((uint32_t)(data[2])) << 16) | - 0x01000000U)); - break; - default: - /* Load 32 bits and add the padding bit to the 33rd bit */ - subterranean_duplex_word(state, le_load_word32(data)); - state->x[8] ^= 0x00000001U; - break; - } -} - uint32_t subterranean_extract(subterranean_state_t *state) { uint32_t x, y; @@ -399,12 +347,57 @@ uint32_t subterranean_extract(subterranean_state_t *state) return y ^ state->x[8]; } +#endif /* !__AVR__ */ + +void subterranean_blank(subterranean_state_t *state) +{ + unsigned round; + for (round = 0; round < 8; ++round) { + subterranean_round(state); + state->x[0] ^= 0x02; /* padding for an empty block is in state bit 1 */ + } +} + +void subterranean_duplex_n + (subterranean_state_t *state, const unsigned char *data, unsigned len) +{ + subterranean_round(state); + switch (len) { + case 0: + state->x[0] ^= 0x02; /* padding for an empty block */ + break; + case 1: + subterranean_absorb_1(state, data[0]); + break; + case 2: + /* Load 16 bits and add the padding bit to the 17th bit */ + subterranean_absorb_word + (state, ((uint32_t)(data[0]) | + (((uint32_t)(data[1])) << 8) | + 0x10000U)); + break; + case 3: + /* Load 24 bits and add the padding bit to the 25th bit */ + subterranean_absorb_word + (state, ((uint32_t)(data[0]) | + (((uint32_t)(data[1])) << 8) | + (((uint32_t)(data[2])) << 16) | + 0x01000000U)); + break; + default: + /* Load 32 bits and add the padding bit to the 33rd bit */ + subterranean_absorb_word(state, le_load_word32(data)); + state->x[8] ^= 0x00000001U; + break; + } +} + void subterranean_absorb (subterranean_state_t *state, const unsigned char *data, unsigned long long len) { while (len >= 4) { - subterranean_duplex_4(state, data); + subterranean_duplex_4(state, le_load_word32(data)); data += 4; len -= 4; } diff --git a/subterranean/Implementations/crypto_aead/subterraneanv1/rhys/internal-subterranean.h b/subterranean/Implementations/crypto_aead/subterraneanv1/rhys/internal-subterranean.h index 71cebb2..8ebbd30 100644 --- a/subterranean/Implementations/crypto_aead/subterraneanv1/rhys/internal-subterranean.h +++ b/subterranean/Implementations/crypto_aead/subterraneanv1/rhys/internal-subterranean.h @@ -28,6 +28,8 @@ /** * \file internal-subterranean.h * \brief Internal implementation of the Subterranean block operation. + * + * References: https://cs.ru.nl/~joan/subterranean.html */ #ifdef __cplusplus @@ -66,7 +68,19 @@ void subterranean_blank(subterranean_state_t *state); * * \param state Subterranean state to be transformed. */ -void subterranean_duplex_0(subterranean_state_t *state); +#define subterranean_duplex_0(state) \ + do { \ + subterranean_round((state)); \ + (state)->x[0] ^= 2; /* padding for an empty block */ \ + } while (0) + +/** + * \brief Absorbs a single byte into the Subterranean state. + * + * \param state Subterranean state to be transformed. + * \param data The single byte to be absorbed. + */ +void subterranean_absorb_1(subterranean_state_t *state, unsigned char data); /** * \brief Performs a single Subterranean round and absorbs one byte. @@ -74,7 +88,11 @@ void subterranean_duplex_0(subterranean_state_t *state); * \param state Subterranean state to be transformed. * \param data The single byte to be absorbed. */ -void subterranean_duplex_1(subterranean_state_t *state, unsigned char data); +#define subterranean_duplex_1(state, data) \ + do { \ + subterranean_round((state)); \ + subterranean_absorb_1((state), (data)); \ + } while (0) /** * \brief Absorbs a 32-bit word into the Subterranean state. @@ -82,17 +100,30 @@ void subterranean_duplex_1(subterranean_state_t *state, unsigned char data); * \param state Subterranean state to be transformed. * \param x The word to absorb into the state. */ -void subterranean_duplex_word(subterranean_state_t *state, uint32_t x); +void subterranean_absorb_word(subterranean_state_t *state, uint32_t x); + +/** + * \brief Absorbs a 32-bit word into the Subterranean state after performing + * the round function. + * + * \param state Subterranean state to be transformed. + * \param x The word to absorb into the state. + */ +#define subterranean_duplex_word(state, x) \ + do { \ + subterranean_round((state)); \ + subterranean_absorb_word((state), (x)); \ + } while (0) /** * \brief Performs a single Subterranean round and absorbs four bytes. * * \param state Subterranean state to be transformed. - * \param data Points to the four data bytes to be absorbed. + * \param data 32-bit word containing the four data bytes to be absorbed. */ #define subterranean_duplex_4(state, data) \ do { \ - subterranean_duplex_word((state), le_load_word32((data))); \ + subterranean_duplex_word((state), (data)); \ (state)->x[8] ^= 1; \ } while (0) diff --git a/subterranean/Implementations/crypto_aead/subterraneanv1/rhys/subterranean.c b/subterranean/Implementations/crypto_aead/subterraneanv1/rhys/subterranean.c index 1bc9fc4..aad147a 100644 --- a/subterranean/Implementations/crypto_aead/subterraneanv1/rhys/subterranean.c +++ b/subterranean/Implementations/crypto_aead/subterraneanv1/rhys/subterranean.c @@ -75,8 +75,7 @@ int subterranean_aead_encrypt while (mlen >= 4) { x1 = le_load_word32(m); x2 = subterranean_extract(&state) ^ x1; - subterranean_duplex_word(&state, x1); - state.x[8] ^= 1; /* padding for 32-bit blocks */ + subterranean_duplex_4(&state, x1); le_store_word32(c, x2); c += 4; m += 4; @@ -146,8 +145,7 @@ int subterranean_aead_decrypt while (clen >= 4) { x = le_load_word32(c); x ^= subterranean_extract(&state); - subterranean_duplex_word(&state, x); - state.x[8] ^= 1; /* padding for 32-bit blocks */ + subterranean_duplex_4(&state, x); le_store_word32(m, x); c += 4; m += 4; diff --git a/subterranean/Implementations/crypto_aead/subterraneanv1/rhys/subterranean.h b/subterranean/Implementations/crypto_aead/subterraneanv1/rhys/subterranean.h index 148e5e8..3b35b42 100644 --- a/subterranean/Implementations/crypto_aead/subterraneanv1/rhys/subterranean.h +++ b/subterranean/Implementations/crypto_aead/subterraneanv1/rhys/subterranean.h @@ -38,6 +38,8 @@ * * The Subterranean permutation is intended for hardware implementation. * It is not structured for efficient software implementation. + * + * References: https://cs.ru.nl/~joan/subterranean.html */ #ifdef __cplusplus diff --git a/subterranean/Implementations/crypto_hash/subterraneanv1/rhys/internal-subterranean-avr.S b/subterranean/Implementations/crypto_hash/subterraneanv1/rhys/internal-subterranean-avr.S new file mode 100644 index 0000000..6380870 --- /dev/null +++ b/subterranean/Implementations/crypto_hash/subterraneanv1/rhys/internal-subterranean-avr.S @@ -0,0 +1,1632 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .text +.global subterranean_round + .type subterranean_round, @function +subterranean_round: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r20,Z + ldd r21,Z+1 + ldd r22,Z+2 + ldd r23,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r2,Z+6 + ldd r3,Z+7 + mov r18,r20 + lsl r18 + ldd r0,Z+32 + eor r18,r0 + movw r4,r20 + movw r6,r22 + movw r8,r26 + movw r10,r2 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r15 + ror r14 + ror r13 + ror r12 + com r4 + com r5 + com r6 + com r7 + com r8 + com r9 + com r10 + and r4,r12 + and r5,r13 + and r6,r14 + and r7,r15 + and r8,r24 + and r9,r25 + and r10,r16 + eor r20,r4 + eor r21,r5 + eor r22,r6 + eor r23,r7 + eor r26,r8 + eor r27,r9 + eor r2,r10 + ldi r25,1 + eor r20,r25 + std Y+1,r20 + std Y+2,r21 + std Y+3,r22 + std Y+4,r23 + std Y+5,r26 + std Y+6,r27 + std Y+7,r2 + mov r20,r3 + ldd r21,Z+8 + ldd r22,Z+9 + ldd r23,Z+10 + ldd r26,Z+11 + ldd r27,Z+12 + ldd r2,Z+13 + ldd r3,Z+14 + movw r4,r20 + movw r6,r22 + movw r8,r26 + movw r10,r2 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r15 + ror r14 + ror r13 + ror r12 + com r4 + com r5 + com r6 + com r7 + com r8 + com r9 + com r10 + and r4,r12 + and r5,r13 + and r6,r14 + and r7,r15 + and r8,r24 + and r9,r25 + and r10,r16 + eor r20,r4 + eor r21,r5 + eor r22,r6 + eor r23,r7 + eor r26,r8 + eor r27,r9 + eor r2,r10 + std Y+8,r20 + std Y+9,r21 + std Y+10,r22 + std Y+11,r23 + std Y+12,r26 + std Y+13,r27 + std Y+14,r2 + mov r20,r3 + ldd r21,Z+15 + ldd r22,Z+16 + ldd r23,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + ldd r2,Z+20 + ldd r3,Z+21 + movw r4,r20 + movw r6,r22 + movw r8,r26 + movw r10,r2 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r15 + ror r14 + ror r13 + ror r12 + com r4 + com r5 + com r6 + com r7 + com r8 + com r9 + com r10 + and r4,r12 + and r5,r13 + and r6,r14 + and r7,r15 + and r8,r24 + and r9,r25 + and r10,r16 + eor r20,r4 + eor r21,r5 + eor r22,r6 + eor r23,r7 + eor r26,r8 + eor r27,r9 + eor r2,r10 + std Y+15,r20 + std Y+16,r21 + std Y+17,r22 + std Y+18,r23 + std Y+19,r26 + std Y+20,r27 + std Y+21,r2 + mov r20,r3 + ldd r21,Z+22 + ldd r22,Z+23 + ldd r23,Z+24 + ldd r26,Z+25 + ldd r27,Z+26 + ldd r2,Z+27 + ldd r3,Z+28 + movw r4,r20 + movw r6,r22 + movw r8,r26 + movw r10,r2 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r15 + ror r14 + ror r13 + ror r12 + com r4 + com r5 + com r6 + com r7 + com r8 + com r9 + com r10 + and r4,r12 + and r5,r13 + and r6,r14 + and r7,r15 + and r8,r24 + and r9,r25 + and r10,r16 + eor r20,r4 + eor r21,r5 + eor r22,r6 + eor r23,r7 + eor r26,r8 + eor r27,r9 + eor r2,r10 + std Y+22,r20 + std Y+23,r21 + std Y+24,r22 + std Y+25,r23 + std Y+26,r26 + std Y+27,r27 + std Y+28,r2 + mov r20,r3 + ldd r21,Z+29 + ldd r22,Z+30 + ldd r23,Z+31 + mov r26,r18 + movw r4,r20 + movw r6,r22 + mov r8,r26 + lsr r8 + ror r7 + ror r6 + ror r5 + ror r4 + movw r12,r4 + movw r14,r6 + mov r24,r8 + lsr r24 + ror r15 + ror r14 + ror r13 + ror r12 + com r4 + com r5 + com r6 + com r7 + and r4,r12 + and r5,r13 + and r6,r14 + and r7,r15 + eor r20,r4 + eor r21,r5 + eor r22,r6 + eor r23,r7 + std Y+29,r20 + std Y+30,r21 + std Y+31,r22 + std Y+32,r23 + mov r20,r18 + lsr r20 + mov r21,r20 + lsr r21 + com r20 + and r20,r21 + eor r18,r20 + andi r18,1 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r2,Y+7 + ldd r3,Y+8 + movw r4,r20 + lsl r4 + rol r5 + eor r18,r4 + mov r19,r5 + movw r4,r20 + movw r6,r22 + movw r8,r26 + movw r10,r2 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + eor r20,r21 + eor r21,r22 + eor r22,r23 + eor r23,r26 + eor r26,r27 + eor r27,r2 + eor r2,r3 + eor r20,r4 + eor r21,r5 + eor r22,r6 + eor r23,r7 + eor r26,r8 + eor r27,r9 + eor r2,r10 + std Y+1,r20 + std Y+2,r21 + std Y+3,r22 + std Y+4,r23 + std Y+5,r26 + std Y+6,r27 + std Y+7,r2 + mov r20,r3 + ldd r21,Y+9 + ldd r22,Y+10 + ldd r23,Y+11 + ldd r26,Y+12 + ldd r27,Y+13 + ldd r2,Y+14 + ldd r3,Y+15 + movw r4,r20 + movw r6,r22 + movw r8,r26 + movw r10,r2 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + eor r20,r21 + eor r21,r22 + eor r22,r23 + eor r23,r26 + eor r26,r27 + eor r27,r2 + eor r2,r3 + eor r20,r4 + eor r21,r5 + eor r22,r6 + eor r23,r7 + eor r26,r8 + eor r27,r9 + eor r2,r10 + std Y+8,r20 + std Y+9,r21 + std Y+10,r22 + std Y+11,r23 + std Y+12,r26 + std Y+13,r27 + std Y+14,r2 + mov r20,r3 + ldd r21,Y+16 + ldd r22,Y+17 + ldd r23,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r2,Y+21 + ldd r3,Y+22 + movw r4,r20 + movw r6,r22 + movw r8,r26 + movw r10,r2 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + eor r20,r21 + eor r21,r22 + eor r22,r23 + eor r23,r26 + eor r26,r27 + eor r27,r2 + eor r2,r3 + eor r20,r4 + eor r21,r5 + eor r22,r6 + eor r23,r7 + eor r26,r8 + eor r27,r9 + eor r2,r10 + std Y+15,r20 + std Y+16,r21 + std Y+17,r22 + std Y+18,r23 + std Y+19,r26 + std Y+20,r27 + std Y+21,r2 + mov r20,r3 + ldd r21,Y+23 + ldd r22,Y+24 + ldd r23,Y+25 + ldd r26,Y+26 + ldd r27,Y+27 + ldd r2,Y+28 + ldd r3,Y+29 + movw r4,r20 + movw r6,r22 + movw r8,r26 + movw r10,r2 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + eor r20,r21 + eor r21,r22 + eor r22,r23 + eor r23,r26 + eor r26,r27 + eor r27,r2 + eor r2,r3 + eor r20,r4 + eor r21,r5 + eor r22,r6 + eor r23,r7 + eor r26,r8 + eor r27,r9 + eor r2,r10 + std Y+22,r20 + std Y+23,r21 + std Y+24,r22 + std Y+25,r23 + std Y+26,r26 + std Y+27,r27 + std Y+28,r2 + mov r20,r3 + ldd r21,Y+30 + ldd r22,Y+31 + ldd r23,Y+32 + mov r26,r18 + movw r4,r20 + movw r6,r22 + mov r8,r26 + lsr r8 + ror r7 + ror r6 + ror r5 + ror r4 + lsr r8 + ror r7 + ror r6 + ror r5 + ror r4 + lsr r8 + ror r7 + ror r6 + ror r5 + ror r4 + eor r20,r21 + eor r21,r22 + eor r22,r23 + eor r23,r26 + eor r20,r4 + eor r21,r5 + eor r22,r6 + eor r23,r7 + std Y+29,r20 + std Y+30,r21 + std Y+31,r22 + std Y+32,r23 + mov r20,r18 + lsr r20 + lsr r20 + lsr r20 + eor r18,r19 + eor r18,r20 + ldd r17,Y+1 + bst r17,0 + bld r20,0 + bst r17,1 + bld r14,6 + bst r17,2 + bld r27,3 + bst r17,4 + bld r6,6 + bst r17,6 + bld r12,1 + bst r17,7 + bld r22,6 + ldd r17,Y+2 + bst r17,0 + bld r25,4 + bst r17,1 + bld r4,1 + bst r17,3 + bld r9,4 + bst r17,4 + bld r20,1 + bst r17,5 + bld r14,7 + bst r17,6 + bld r27,4 + ldd r17,Y+3 + bst r17,0 + bld r6,7 + bst r17,2 + bld r12,2 + bst r17,3 + bld r22,7 + bst r17,4 + bld r25,5 + bst r17,5 + bld r4,2 + bst r17,7 + bld r9,5 + ldd r17,Y+4 + bst r17,0 + bld r20,2 + bst r17,1 + bld r15,0 + bst r17,2 + bld r27,5 + bst r17,4 + bld r7,0 + bst r17,6 + bld r12,3 + bst r17,7 + bld r23,0 + ldd r17,Y+5 + bst r17,0 + bld r25,6 + bst r17,1 + bld r4,3 + bst r17,3 + bld r9,6 + bst r17,4 + bld r20,3 + bst r17,5 + bld r15,1 + bst r17,6 + bld r27,6 + ldd r17,Y+6 + bst r17,0 + bld r7,1 + bst r17,2 + bld r12,4 + bst r17,3 + bld r23,1 + bst r17,4 + bld r25,7 + bst r17,5 + bld r4,4 + bst r17,7 + bld r9,7 + ldd r17,Y+7 + bst r17,0 + bld r20,4 + bst r17,1 + bld r15,2 + bst r17,2 + bld r27,7 + bst r17,4 + bld r7,2 + bst r17,6 + bld r12,5 + bst r17,7 + bld r23,2 + ldd r17,Y+8 + bst r17,0 + bld r16,0 + bst r17,1 + bld r4,5 + bst r17,3 + bld r10,0 + bst r17,4 + bld r20,5 + bst r17,5 + bld r15,3 + bst r17,6 + bld r2,0 + ldd r17,Y+9 + bst r17,0 + bld r7,3 + bst r17,2 + bld r12,6 + bst r17,3 + bld r23,3 + bst r17,4 + bld r16,1 + bst r17,5 + bld r4,6 + bst r17,7 + bld r10,1 + ldd r17,Y+10 + bst r17,0 + bld r20,6 + bst r17,1 + bld r15,4 + bst r17,2 + bld r2,1 + bst r17,4 + bld r7,4 + bst r17,6 + bld r12,7 + bst r17,7 + bld r23,4 + ldd r17,Y+11 + bst r17,0 + bld r16,2 + bst r17,1 + bld r4,7 + bst r17,3 + bld r10,2 + bst r17,4 + bld r20,7 + bst r17,5 + bld r15,5 + bst r17,6 + bld r2,2 + ldd r17,Y+12 + bst r17,0 + bld r7,5 + bst r17,2 + bld r13,0 + bst r17,3 + bld r23,5 + bst r17,4 + bld r16,3 + bst r17,5 + bld r5,0 + bst r17,7 + bld r10,3 + ldd r17,Y+13 + bst r17,0 + bld r21,0 + bst r17,1 + bld r15,6 + bst r17,2 + bld r2,3 + bst r17,4 + bld r7,6 + bst r17,6 + bld r13,1 + bst r17,7 + bld r23,6 + ldd r17,Y+14 + bst r17,0 + bld r16,4 + bst r17,1 + bld r5,1 + bst r17,3 + bld r10,4 + bst r17,4 + bld r21,1 + bst r17,5 + bld r15,7 + bst r17,6 + bld r2,4 + ldd r17,Y+15 + bst r17,0 + bld r7,7 + bst r17,2 + bld r13,2 + bst r17,3 + bld r23,7 + bst r17,4 + bld r16,5 + bst r17,5 + bld r5,2 + bst r17,7 + bld r10,5 + ldd r17,Y+16 + bst r17,0 + bld r21,2 + bst r17,1 + bld r24,0 + bst r17,2 + bld r2,5 + bst r17,4 + bld r8,0 + bst r17,6 + bld r13,3 + bst r17,7 + bld r26,0 + ldd r17,Y+17 + bst r17,0 + bld r16,6 + bst r17,1 + bld r5,3 + bst r17,3 + bld r10,6 + bst r17,4 + bld r21,3 + bst r17,5 + bld r24,1 + bst r17,6 + bld r2,6 + ldd r17,Y+18 + bst r17,0 + bld r8,1 + bst r17,2 + bld r13,4 + bst r17,3 + bld r26,1 + bst r17,4 + bld r16,7 + bst r17,5 + bld r5,4 + bst r17,7 + bld r10,7 + ldd r17,Y+19 + bst r17,0 + bld r21,4 + bst r17,1 + bld r24,2 + bst r17,2 + bld r2,7 + bst r17,4 + bld r8,2 + bst r17,6 + bld r13,5 + bst r17,7 + bld r26,2 + ldd r17,Y+20 + bst r17,1 + bld r5,5 + bst r17,3 + bld r11,0 + bst r17,4 + bld r21,5 + bst r17,5 + bld r24,3 + bst r17,6 + bld r3,0 + ldd r17,Y+21 + bst r17,0 + bld r8,3 + bst r17,2 + bld r13,6 + bst r17,3 + bld r26,3 + bst r17,5 + bld r5,6 + bst r17,7 + bld r11,1 + ldd r17,Y+22 + bst r17,0 + bld r21,6 + bst r17,1 + bld r24,4 + bst r17,2 + bld r3,1 + bst r17,4 + bld r8,4 + bst r17,6 + bld r13,7 + bst r17,7 + bld r26,4 + ldd r17,Y+23 + bst r17,1 + bld r5,7 + bst r17,3 + bld r11,2 + bst r17,4 + bld r21,7 + bst r17,5 + bld r24,5 + bst r17,6 + bld r3,2 + ldd r17,Y+24 + bst r17,0 + bld r8,5 + bst r17,2 + bld r14,0 + bst r17,3 + bld r26,5 + bst r17,5 + bld r6,0 + bst r17,7 + bld r11,3 + ldd r17,Y+25 + bst r17,0 + bld r22,0 + bst r17,1 + bld r24,6 + bst r17,2 + bld r3,3 + bst r17,4 + bld r8,6 + bst r17,6 + bld r14,1 + bst r17,7 + bld r26,6 + ldd r17,Y+26 + bst r17,1 + bld r6,1 + bst r17,3 + bld r11,4 + bst r17,4 + bld r22,1 + bst r17,5 + bld r24,7 + bst r17,6 + bld r3,4 + ldd r17,Y+27 + bst r17,0 + bld r8,7 + bst r17,2 + bld r14,2 + bst r17,3 + bld r26,7 + bst r17,5 + bld r6,2 + bst r17,7 + bld r11,5 + ldd r17,Y+28 + bst r17,0 + bld r22,2 + bst r17,1 + bld r25,0 + bst r17,2 + bld r3,5 + bst r17,4 + bld r9,0 + bst r17,6 + bld r14,3 + bst r17,7 + bld r27,0 + ldd r17,Y+29 + bst r17,1 + bld r6,3 + bst r17,3 + bld r11,6 + bst r17,4 + bld r22,3 + bst r17,5 + bld r25,1 + bst r17,6 + bld r3,6 + ldd r17,Y+30 + bst r17,0 + bld r9,1 + bst r17,2 + bld r14,4 + bst r17,3 + bld r27,1 + bst r17,5 + bld r6,4 + bst r17,7 + bld r11,7 + ldd r17,Y+31 + bst r17,0 + bld r22,4 + bst r17,1 + bld r25,2 + bst r17,2 + bld r3,7 + bst r17,4 + bld r9,2 + bst r17,6 + bld r14,5 + bst r17,7 + bld r27,2 + ldd r17,Y+32 + bst r17,1 + bld r6,5 + bst r17,3 + bld r12,0 + bst r17,4 + bld r22,5 + bst r17,5 + bld r25,3 + bst r17,6 + bld r4,0 + bst r18,0 + bld r9,3 + st Z,r20 + std Z+1,r21 + std Z+2,r22 + std Z+3,r23 + std Z+4,r26 + std Z+5,r27 + std Z+6,r2 + std Z+7,r3 + std Z+8,r4 + std Z+9,r5 + std Z+10,r6 + std Z+11,r7 + std Z+12,r8 + std Z+13,r9 + std Z+14,r10 + std Z+15,r11 + std Z+16,r12 + std Z+17,r13 + std Z+18,r14 + std Z+19,r15 + std Z+20,r24 + std Z+21,r25 + std Z+22,r16 + mov r5,r1 + ldd r17,Y+1 + bst r17,3 + bld r21,1 + bst r17,5 + bld r2,4 + ldd r17,Y+2 + bst r17,2 + bld r23,7 + bst r17,7 + bld r21,2 + ldd r17,Y+3 + bst r17,1 + bld r2,5 + bst r17,6 + bld r26,0 + ldd r17,Y+4 + bst r17,3 + bld r21,3 + bst r17,5 + bld r2,6 + ldd r17,Y+5 + bst r17,2 + bld r26,1 + bst r17,7 + bld r21,4 + ldd r17,Y+6 + bst r17,1 + bld r2,7 + bst r17,6 + bld r26,2 + ldd r17,Y+7 + bst r17,3 + bld r21,5 + bst r17,5 + bld r3,0 + ldd r17,Y+8 + bst r17,2 + bld r26,3 + bst r17,7 + bld r21,6 + ldd r17,Y+9 + bst r17,1 + bld r3,1 + bst r17,6 + bld r26,4 + ldd r17,Y+10 + bst r17,3 + bld r21,7 + bst r17,5 + bld r3,2 + ldd r17,Y+11 + bst r17,2 + bld r26,5 + bst r17,7 + bld r22,0 + ldd r17,Y+12 + bst r17,1 + bld r3,3 + bst r17,6 + bld r26,6 + ldd r17,Y+13 + bst r17,3 + bld r22,1 + bst r17,5 + bld r3,4 + ldd r17,Y+14 + bst r17,2 + bld r26,7 + bst r17,7 + bld r22,2 + ldd r17,Y+15 + bst r17,1 + bld r3,5 + bst r17,6 + bld r27,0 + ldd r17,Y+16 + bst r17,3 + bld r22,3 + bst r17,5 + bld r3,6 + ldd r17,Y+17 + bst r17,2 + bld r27,1 + bst r17,7 + bld r22,4 + ldd r17,Y+18 + bst r17,1 + bld r3,7 + bst r17,6 + bld r27,2 + ldd r17,Y+19 + bst r17,3 + bld r22,5 + bst r17,5 + bld r4,0 + ldd r17,Y+20 + bst r17,0 + bld r20,0 + bst r17,2 + bld r27,3 + bst r17,7 + bld r22,6 + ldd r17,Y+21 + bst r17,1 + bld r4,1 + bst r17,4 + bld r20,1 + bst r17,6 + bld r27,4 + ldd r17,Y+22 + bst r17,3 + bld r22,7 + bst r17,5 + bld r4,2 + ldd r17,Y+23 + bst r17,0 + bld r20,2 + bst r17,2 + bld r27,5 + bst r17,7 + bld r23,0 + ldd r17,Y+24 + bst r17,1 + bld r4,3 + bst r17,4 + bld r20,3 + bst r17,6 + bld r27,6 + ldd r17,Y+25 + bst r17,3 + bld r23,1 + bst r17,5 + bld r4,4 + ldd r17,Y+26 + bst r17,0 + bld r20,4 + bst r17,2 + bld r27,7 + bst r17,7 + bld r23,2 + ldd r17,Y+27 + bst r17,1 + bld r4,5 + bst r17,4 + bld r20,5 + bst r17,6 + bld r2,0 + ldd r17,Y+28 + bst r17,3 + bld r23,3 + bst r17,5 + bld r4,6 + ldd r17,Y+29 + bst r17,0 + bld r20,6 + bst r17,2 + bld r2,1 + bst r17,7 + bld r23,4 + ldd r17,Y+30 + bst r17,1 + bld r4,7 + bst r17,4 + bld r20,7 + bst r17,6 + bld r2,2 + ldd r17,Y+31 + bst r17,3 + bld r23,5 + bst r17,5 + bld r5,0 + ldd r17,Y+32 + bst r17,0 + bld r21,0 + bst r17,2 + bld r2,3 + bst r17,7 + bld r23,6 + std Z+23,r20 + std Z+24,r21 + std Z+25,r22 + std Z+26,r23 + std Z+27,r26 + std Z+28,r27 + std Z+29,r2 + std Z+30,r3 + std Z+31,r4 + std Z+32,r5 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size subterranean_round, .-subterranean_round + + .text +.global subterranean_absorb_1 + .type subterranean_absorb_1, @function +subterranean_absorb_1: + movw r30,r24 +.L__stack_usage = 2 + ldi r23,1 + mov r18,r1 + bst r22,0 + bld r18,1 + ld r0,Z + eor r0,r18 + st Z,r0 + mov r18,r1 + bst r22,3 + bld r18,3 + ldd r0,Z+4 + eor r0,r18 + std Z+4,r0 + mov r18,r1 + bst r23,0 + bld r18,0 + ldd r0,Z+8 + eor r0,r18 + std Z+8,r0 + mov r18,r1 + bst r22,5 + bld r18,6 + ldd r0,Z+16 + eor r0,r18 + std Z+16,r0 + mov r18,r1 + bst r22,2 + bld r18,0 + ldd r0,Z+17 + eor r0,r18 + std Z+17,r0 + mov r18,r1 + bst r22,1 + bld r18,0 + ldd r0,Z+22 + eor r0,r18 + std Z+22,r0 + mov r18,r1 + bst r22,6 + bld r18,5 + ldd r0,Z+24 + eor r0,r18 + std Z+24,r0 + mov r18,r1 + bst r22,7 + bld r18,2 + ldd r0,Z+29 + eor r0,r18 + std Z+29,r0 + mov r18,r1 + bst r22,4 + bld r18,1 + ldd r0,Z+31 + eor r0,r18 + std Z+31,r0 + ret + .size subterranean_absorb_1, .-subterranean_absorb_1 + + .text +.global subterranean_absorb_word + .type subterranean_absorb_word, @function +subterranean_absorb_word: + movw r30,r24 +.L__stack_usage = 2 + mov r18,r1 + bst r20,0 + bld r18,1 + bst r21,4 + bld r18,2 + bst r23,0 + bld r18,4 + ld r0,Z + eor r0,r18 + st Z,r0 + mov r18,r1 + bst r22,1 + bld r18,3 + bst r21,6 + bld r18,7 + ldd r0,Z+1 + eor r0,r18 + std Z+1,r0 + mov r18,r1 + bst r23,6 + bld r18,1 + bst r23,5 + bld r18,6 + ldd r0,Z+2 + eor r0,r18 + std Z+2,r0 + mov r18,r1 + bst r23,2 + bld r18,6 + ldd r0,Z+3 + eor r0,r18 + std Z+3,r0 + mov r18,r1 + bst r20,3 + bld r18,3 + ldd r0,Z+4 + eor r0,r18 + std Z+4,r0 + mov r18,r1 + bst r21,0 + bld r18,0 + bst r21,7 + bld r18,6 + ldd r0,Z+8 + eor r0,r18 + std Z+8,r0 + mov r18,r1 + bst r21,5 + bld r18,7 + ldd r0,Z+11 + eor r0,r18 + std Z+11,r0 + mov r18,r1 + bst r22,7 + bld r18,7 + ldd r0,Z+13 + eor r0,r18 + std Z+13,r0 + mov r18,r1 + bst r22,4 + bld r18,0 + bst r20,5 + bld r18,6 + ldd r0,Z+16 + eor r0,r18 + std Z+16,r0 + mov r18,r1 + bst r20,2 + bld r18,0 + bst r22,2 + bld r18,1 + bst r23,3 + bld r18,4 + ldd r0,Z+17 + eor r0,r18 + std Z+17,r0 + mov r18,r1 + bst r23,7 + bld r18,5 + ldd r0,Z+20 + eor r0,r18 + std Z+20,r0 + mov r18,r1 + bst r22,5 + bld r18,1 + ldd r0,Z+21 + eor r0,r18 + std Z+21,r0 + mov r18,r1 + bst r20,1 + bld r18,0 + ldd r0,Z+22 + eor r0,r18 + std Z+22,r0 + mov r18,r1 + bst r21,3 + bld r18,0 + bst r22,6 + bld r18,5 + bst r23,1 + bld r18,6 + ldd r0,Z+23 + eor r0,r18 + std Z+23,r0 + mov r18,r1 + bst r20,6 + bld r18,5 + ldd r0,Z+24 + eor r0,r18 + std Z+24,r0 + mov r18,r1 + bst r22,3 + bld r18,3 + bst r21,1 + bld r18,5 + ldd r0,Z+26 + eor r0,r18 + std Z+26,r0 + mov r18,r1 + bst r21,2 + bld r18,7 + ldd r0,Z+27 + eor r0,r18 + std Z+27,r0 + mov r18,r1 + bst r23,4 + bld r18,1 + ldd r0,Z+28 + eor r0,r18 + std Z+28,r0 + mov r18,r1 + bst r20,7 + bld r18,2 + ldd r0,Z+29 + eor r0,r18 + std Z+29,r0 + mov r18,r1 + bst r22,0 + bld r18,1 + ldd r0,Z+30 + eor r0,r18 + std Z+30,r0 + mov r18,r1 + bst r20,4 + bld r18,1 + ldd r0,Z+31 + eor r0,r18 + std Z+31,r0 + ret + .size subterranean_absorb_word, .-subterranean_absorb_word + + .text +.global subterranean_extract + .type subterranean_extract, @function +subterranean_extract: + movw r30,r24 +.L__stack_usage = 2 + ld r26,Z + bst r26,1 + bld r22,0 + bst r26,2 + bld r23,4 + bst r26,4 + bld r25,0 + ldd r26,Z+1 + bst r26,0 + bld r18,4 + bst r26,3 + bld r24,1 + bst r26,7 + bld r23,6 + ldd r26,Z+2 + bst r26,0 + bld r20,0 + bst r26,1 + bld r25,6 + bst r26,6 + bld r25,5 + bst r26,7 + bld r18,7 + ldd r26,Z+3 + bst r26,6 + bld r25,2 + ldd r26,Z+4 + bst r26,0 + bld r21,4 + bst r26,2 + bld r19,2 + bst r26,3 + bld r22,3 + ldd r26,Z+5 + bst r26,4 + bld r19,1 + bst r26,6 + bld r20,3 + ldd r26,Z+7 + bst r26,4 + bld r18,6 + ldd r26,Z+8 + bst r26,0 + bld r23,0 + bst r26,3 + bld r21,1 + bst r26,4 + bld r20,6 + bst r26,6 + bld r23,7 + ldd r26,Z+9 + bst r26,1 + bld r19,3 + ldd r26,Z+10 + bst r26,1 + bld r18,1 + ldd r26,Z+11 + bst r26,0 + bld r20,5 + bst r26,4 + bld r21,7 + bst r26,7 + bld r23,5 + ldd r26,Z+13 + bst r26,7 + bld r24,7 + ldd r26,Z+14 + bst r26,5 + bld r21,3 + ldd r26,Z+15 + bst r26,0 + bld r20,2 + bst r26,1 + bld r18,2 + bst r26,3 + bld r18,5 + ldd r26,Z+16 + bst r26,0 + bld r24,4 + bst r26,1 + bld r20,4 + bst r26,6 + bld r22,5 + ldd r26,Z+17 + bst r26,0 + bld r22,2 + bst r26,1 + bld r24,2 + bst r26,4 + bld r25,3 + ldd r26,Z+18 + bst r26,2 + bld r20,7 + ldd r26,Z+20 + bst r26,2 + bld r19,5 + bst r26,5 + bld r25,7 + ldd r26,Z+21 + bst r26,1 + bld r24,5 + ldd r26,Z+22 + bst r26,0 + bld r22,1 + ldd r26,Z+23 + bst r26,0 + bld r23,3 + bst r26,3 + bld r19,7 + bst r26,5 + bld r24,6 + bst r26,6 + bld r25,1 + ldd r26,Z+24 + bst r26,1 + bld r19,0 + bst r26,5 + bld r22,6 + ldd r26,Z+26 + bst r26,3 + bld r24,3 + bst r26,5 + bld r23,1 + ldd r26,Z+27 + bst r26,6 + bld r18,3 + bst r26,7 + bld r23,2 + ldd r26,Z+28 + bst r26,1 + bld r25,4 + bst r26,3 + bld r21,2 + ldd r26,Z+29 + bst r26,2 + bld r22,7 + bst r26,3 + bld r21,5 + ldd r26,Z+30 + bst r26,0 + bld r21,6 + bst r26,1 + bld r24,0 + bst r26,2 + bld r19,6 + bst r26,6 + bld r20,1 + ldd r26,Z+31 + bst r26,1 + bld r22,4 + bst r26,5 + bld r21,0 + bst r26,7 + bld r19,4 + ldd r26,Z+32 + bst r26,0 + bld r18,0 + eor r22,r18 + eor r23,r19 + eor r24,r20 + eor r25,r21 + ret + .size subterranean_extract, .-subterranean_extract + +#endif diff --git a/subterranean/Implementations/crypto_hash/subterraneanv1/rhys/internal-subterranean.c b/subterranean/Implementations/crypto_hash/subterraneanv1/rhys/internal-subterranean.c index 1cb64e2..71b1c4c 100644 --- a/subterranean/Implementations/crypto_hash/subterraneanv1/rhys/internal-subterranean.c +++ b/subterranean/Implementations/crypto_hash/subterraneanv1/rhys/internal-subterranean.c @@ -23,6 +23,8 @@ #include "internal-subterranean.h" #include +#if !defined(__AVR__) + void subterranean_round(subterranean_state_t *state) { uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8; @@ -168,28 +170,10 @@ void subterranean_round(subterranean_state_t *state) state->x[8] = BDN(x7, 21, 0); } -void subterranean_blank(subterranean_state_t *state) -{ - unsigned round; - for (round = 0; round < 8; ++round) { - subterranean_round(state); - state->x[0] ^= 0x02; /* padding for an empty block is in state bit 1 */ - } -} - -void subterranean_duplex_0(subterranean_state_t *state) -{ - subterranean_round(state); - state->x[0] ^= 0x02; /* padding for an empty block is in state bit 1 */ -} - -void subterranean_duplex_1(subterranean_state_t *state, unsigned char data) +void subterranean_absorb_1(subterranean_state_t *state, unsigned char data) { uint32_t x = data; - /* Perform a single Subterranean round before absorbing the bits */ - subterranean_round(state); - /* Rearrange the bits and absorb them into the state */ state->x[0] ^= (x << 1) & 0x00000002U; state->x[1] ^= x & 0x00000008U; @@ -200,13 +184,10 @@ void subterranean_duplex_1(subterranean_state_t *state, unsigned char data) state->x[7] ^= ((x << 21) & 0x02000000U) ^ ((x << 3) & 0x00000400U); } -void subterranean_duplex_word(subterranean_state_t *state, uint32_t x) +void subterranean_absorb_word(subterranean_state_t *state, uint32_t x) { uint32_t y; - /* Perform a single Subterranean round before absorbing the bits */ - subterranean_round(state); - /* To absorb the word into the state, we first rearrange the source * bits to be in the right target bit positions. Then we mask and * XOR them into the relevant words of the state. @@ -258,39 +239,6 @@ void subterranean_duplex_word(subterranean_state_t *state, uint32_t x) state->x[7] ^= (y & 0x02000400U) ^ (x & 0x00020002U); } -void subterranean_duplex_n - (subterranean_state_t *state, const unsigned char *data, unsigned len) -{ - switch (len) { - case 0: - subterranean_duplex_0(state); - break; - case 1: - subterranean_duplex_1(state, data[0]); - break; - case 2: - /* Load 16 bits and add the padding bit to the 17th bit */ - subterranean_duplex_word - (state, ((uint32_t)(data[0]) | - (((uint32_t)(data[1])) << 8) | - 0x10000U)); - break; - case 3: - /* Load 24 bits and add the padding bit to the 25th bit */ - subterranean_duplex_word - (state, ((uint32_t)(data[0]) | - (((uint32_t)(data[1])) << 8) | - (((uint32_t)(data[2])) << 16) | - 0x01000000U)); - break; - default: - /* Load 32 bits and add the padding bit to the 33rd bit */ - subterranean_duplex_word(state, le_load_word32(data)); - state->x[8] ^= 0x00000001U; - break; - } -} - uint32_t subterranean_extract(subterranean_state_t *state) { uint32_t x, y; @@ -399,12 +347,57 @@ uint32_t subterranean_extract(subterranean_state_t *state) return y ^ state->x[8]; } +#endif /* !__AVR__ */ + +void subterranean_blank(subterranean_state_t *state) +{ + unsigned round; + for (round = 0; round < 8; ++round) { + subterranean_round(state); + state->x[0] ^= 0x02; /* padding for an empty block is in state bit 1 */ + } +} + +void subterranean_duplex_n + (subterranean_state_t *state, const unsigned char *data, unsigned len) +{ + subterranean_round(state); + switch (len) { + case 0: + state->x[0] ^= 0x02; /* padding for an empty block */ + break; + case 1: + subterranean_absorb_1(state, data[0]); + break; + case 2: + /* Load 16 bits and add the padding bit to the 17th bit */ + subterranean_absorb_word + (state, ((uint32_t)(data[0]) | + (((uint32_t)(data[1])) << 8) | + 0x10000U)); + break; + case 3: + /* Load 24 bits and add the padding bit to the 25th bit */ + subterranean_absorb_word + (state, ((uint32_t)(data[0]) | + (((uint32_t)(data[1])) << 8) | + (((uint32_t)(data[2])) << 16) | + 0x01000000U)); + break; + default: + /* Load 32 bits and add the padding bit to the 33rd bit */ + subterranean_absorb_word(state, le_load_word32(data)); + state->x[8] ^= 0x00000001U; + break; + } +} + void subterranean_absorb (subterranean_state_t *state, const unsigned char *data, unsigned long long len) { while (len >= 4) { - subterranean_duplex_4(state, data); + subterranean_duplex_4(state, le_load_word32(data)); data += 4; len -= 4; } diff --git a/subterranean/Implementations/crypto_hash/subterraneanv1/rhys/internal-subterranean.h b/subterranean/Implementations/crypto_hash/subterraneanv1/rhys/internal-subterranean.h index 71cebb2..8ebbd30 100644 --- a/subterranean/Implementations/crypto_hash/subterraneanv1/rhys/internal-subterranean.h +++ b/subterranean/Implementations/crypto_hash/subterraneanv1/rhys/internal-subterranean.h @@ -28,6 +28,8 @@ /** * \file internal-subterranean.h * \brief Internal implementation of the Subterranean block operation. + * + * References: https://cs.ru.nl/~joan/subterranean.html */ #ifdef __cplusplus @@ -66,7 +68,19 @@ void subterranean_blank(subterranean_state_t *state); * * \param state Subterranean state to be transformed. */ -void subterranean_duplex_0(subterranean_state_t *state); +#define subterranean_duplex_0(state) \ + do { \ + subterranean_round((state)); \ + (state)->x[0] ^= 2; /* padding for an empty block */ \ + } while (0) + +/** + * \brief Absorbs a single byte into the Subterranean state. + * + * \param state Subterranean state to be transformed. + * \param data The single byte to be absorbed. + */ +void subterranean_absorb_1(subterranean_state_t *state, unsigned char data); /** * \brief Performs a single Subterranean round and absorbs one byte. @@ -74,7 +88,11 @@ void subterranean_duplex_0(subterranean_state_t *state); * \param state Subterranean state to be transformed. * \param data The single byte to be absorbed. */ -void subterranean_duplex_1(subterranean_state_t *state, unsigned char data); +#define subterranean_duplex_1(state, data) \ + do { \ + subterranean_round((state)); \ + subterranean_absorb_1((state), (data)); \ + } while (0) /** * \brief Absorbs a 32-bit word into the Subterranean state. @@ -82,17 +100,30 @@ void subterranean_duplex_1(subterranean_state_t *state, unsigned char data); * \param state Subterranean state to be transformed. * \param x The word to absorb into the state. */ -void subterranean_duplex_word(subterranean_state_t *state, uint32_t x); +void subterranean_absorb_word(subterranean_state_t *state, uint32_t x); + +/** + * \brief Absorbs a 32-bit word into the Subterranean state after performing + * the round function. + * + * \param state Subterranean state to be transformed. + * \param x The word to absorb into the state. + */ +#define subterranean_duplex_word(state, x) \ + do { \ + subterranean_round((state)); \ + subterranean_absorb_word((state), (x)); \ + } while (0) /** * \brief Performs a single Subterranean round and absorbs four bytes. * * \param state Subterranean state to be transformed. - * \param data Points to the four data bytes to be absorbed. + * \param data 32-bit word containing the four data bytes to be absorbed. */ #define subterranean_duplex_4(state, data) \ do { \ - subterranean_duplex_word((state), le_load_word32((data))); \ + subterranean_duplex_word((state), (data)); \ (state)->x[8] ^= 1; \ } while (0) diff --git a/subterranean/Implementations/crypto_hash/subterraneanv1/rhys/subterranean.c b/subterranean/Implementations/crypto_hash/subterraneanv1/rhys/subterranean.c index 1bc9fc4..aad147a 100644 --- a/subterranean/Implementations/crypto_hash/subterraneanv1/rhys/subterranean.c +++ b/subterranean/Implementations/crypto_hash/subterraneanv1/rhys/subterranean.c @@ -75,8 +75,7 @@ int subterranean_aead_encrypt while (mlen >= 4) { x1 = le_load_word32(m); x2 = subterranean_extract(&state) ^ x1; - subterranean_duplex_word(&state, x1); - state.x[8] ^= 1; /* padding for 32-bit blocks */ + subterranean_duplex_4(&state, x1); le_store_word32(c, x2); c += 4; m += 4; @@ -146,8 +145,7 @@ int subterranean_aead_decrypt while (clen >= 4) { x = le_load_word32(c); x ^= subterranean_extract(&state); - subterranean_duplex_word(&state, x); - state.x[8] ^= 1; /* padding for 32-bit blocks */ + subterranean_duplex_4(&state, x); le_store_word32(m, x); c += 4; m += 4; diff --git a/subterranean/Implementations/crypto_hash/subterraneanv1/rhys/subterranean.h b/subterranean/Implementations/crypto_hash/subterraneanv1/rhys/subterranean.h index 148e5e8..3b35b42 100644 --- a/subterranean/Implementations/crypto_hash/subterraneanv1/rhys/subterranean.h +++ b/subterranean/Implementations/crypto_hash/subterraneanv1/rhys/subterranean.h @@ -38,6 +38,8 @@ * * The Subterranean permutation is intended for hardware implementation. * It is not structured for efficient software implementation. + * + * References: https://cs.ru.nl/~joan/subterranean.html */ #ifdef __cplusplus diff --git a/wage/Implementations/crypto_aead/wageae128v1/rhys/internal-wage-avr.S b/wage/Implementations/crypto_aead/wageae128v1/rhys/internal-wage-avr.S new file mode 100644 index 0000000..4b6c72f --- /dev/null +++ b/wage/Implementations/crypto_aead/wageae128v1/rhys/internal-wage-avr.S @@ -0,0 +1,1411 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_0, @object + .size table_0, 256 +table_0: + .byte 46 + .byte 28 + .byte 109 + .byte 43 + .byte 53 + .byte 7 + .byte 127 + .byte 59 + .byte 40 + .byte 8 + .byte 11 + .byte 95 + .byte 49 + .byte 17 + .byte 27 + .byte 77 + .byte 110 + .byte 84 + .byte 13 + .byte 9 + .byte 31 + .byte 69 + .byte 117 + .byte 83 + .byte 106 + .byte 93 + .byte 97 + .byte 0 + .byte 4 + .byte 120 + .byte 6 + .byte 30 + .byte 55 + .byte 111 + .byte 47 + .byte 73 + .byte 100 + .byte 52 + .byte 125 + .byte 25 + .byte 57 + .byte 51 + .byte 67 + .byte 87 + .byte 96 + .byte 98 + .byte 19 + .byte 5 + .byte 119 + .byte 71 + .byte 79 + .byte 75 + .byte 29 + .byte 45 + .byte 36 + .byte 72 + .byte 116 + .byte 88 + .byte 37 + .byte 94 + .byte 90 + .byte 118 + .byte 65 + .byte 66 + .byte 39 + .byte 62 + .byte 108 + .byte 1 + .byte 44 + .byte 60 + .byte 78 + .byte 26 + .byte 33 + .byte 42 + .byte 10 + .byte 85 + .byte 58 + .byte 56 + .byte 24 + .byte 126 + .byte 12 + .byte 99 + .byte 103 + .byte 86 + .byte 80 + .byte 124 + .byte 50 + .byte 122 + .byte 104 + .byte 2 + .byte 107 + .byte 23 + .byte 123 + .byte 89 + .byte 113 + .byte 15 + .byte 48 + .byte 16 + .byte 34 + .byte 61 + .byte 64 + .byte 105 + .byte 82 + .byte 20 + .byte 54 + .byte 68 + .byte 70 + .byte 3 + .byte 22 + .byte 101 + .byte 102 + .byte 114 + .byte 18 + .byte 14 + .byte 41 + .byte 74 + .byte 76 + .byte 112 + .byte 21 + .byte 38 + .byte 121 + .byte 81 + .byte 35 + .byte 63 + .byte 115 + .byte 91 + .byte 32 + .byte 92 + .byte 0 + .byte 18 + .byte 10 + .byte 75 + .byte 102 + .byte 12 + .byte 72 + .byte 115 + .byte 121 + .byte 62 + .byte 97 + .byte 81 + .byte 1 + .byte 21 + .byte 23 + .byte 14 + .byte 126 + .byte 51 + .byte 104 + .byte 54 + .byte 66 + .byte 53 + .byte 55 + .byte 94 + .byte 83 + .byte 76 + .byte 63 + .byte 84 + .byte 88 + .byte 110 + .byte 86 + .byte 42 + .byte 29 + .byte 37 + .byte 109 + .byte 101 + .byte 91 + .byte 113 + .byte 47 + .byte 32 + .byte 6 + .byte 24 + .byte 41 + .byte 58 + .byte 13 + .byte 122 + .byte 108 + .byte 27 + .byte 25 + .byte 67 + .byte 112 + .byte 65 + .byte 73 + .byte 34 + .byte 119 + .byte 96 + .byte 79 + .byte 69 + .byte 85 + .byte 2 + .byte 99 + .byte 71 + .byte 117 + .byte 45 + .byte 64 + .byte 70 + .byte 125 + .byte 92 + .byte 124 + .byte 89 + .byte 38 + .byte 11 + .byte 9 + .byte 3 + .byte 87 + .byte 93 + .byte 39 + .byte 120 + .byte 48 + .byte 46 + .byte 68 + .byte 82 + .byte 59 + .byte 8 + .byte 103 + .byte 44 + .byte 5 + .byte 107 + .byte 43 + .byte 26 + .byte 33 + .byte 56 + .byte 7 + .byte 15 + .byte 74 + .byte 17 + .byte 80 + .byte 106 + .byte 40 + .byte 49 + .byte 16 + .byte 77 + .byte 95 + .byte 114 + .byte 57 + .byte 22 + .byte 90 + .byte 19 + .byte 4 + .byte 60 + .byte 52 + .byte 31 + .byte 118 + .byte 30 + .byte 20 + .byte 35 + .byte 28 + .byte 50 + .byte 78 + .byte 123 + .byte 36 + .byte 116 + .byte 127 + .byte 61 + .byte 105 + .byte 100 + .byte 98 + .byte 111 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_1, @object + .size table_1, 222 +table_1: + .byte 127 + .byte 63 + .byte 31 + .byte 15 + .byte 7 + .byte 3 + .byte 1 + .byte 64 + .byte 32 + .byte 16 + .byte 8 + .byte 4 + .byte 2 + .byte 65 + .byte 96 + .byte 48 + .byte 24 + .byte 12 + .byte 6 + .byte 67 + .byte 33 + .byte 80 + .byte 40 + .byte 20 + .byte 10 + .byte 69 + .byte 98 + .byte 113 + .byte 120 + .byte 60 + .byte 30 + .byte 79 + .byte 39 + .byte 19 + .byte 9 + .byte 68 + .byte 34 + .byte 81 + .byte 104 + .byte 52 + .byte 26 + .byte 77 + .byte 102 + .byte 115 + .byte 57 + .byte 92 + .byte 46 + .byte 87 + .byte 43 + .byte 21 + .byte 74 + .byte 101 + .byte 114 + .byte 121 + .byte 124 + .byte 62 + .byte 95 + .byte 47 + .byte 23 + .byte 11 + .byte 5 + .byte 66 + .byte 97 + .byte 112 + .byte 56 + .byte 28 + .byte 14 + .byte 71 + .byte 35 + .byte 17 + .byte 72 + .byte 36 + .byte 18 + .byte 73 + .byte 100 + .byte 50 + .byte 89 + .byte 108 + .byte 54 + .byte 91 + .byte 45 + .byte 86 + .byte 107 + .byte 53 + .byte 90 + .byte 109 + .byte 118 + .byte 123 + .byte 61 + .byte 94 + .byte 111 + .byte 55 + .byte 27 + .byte 13 + .byte 70 + .byte 99 + .byte 49 + .byte 88 + .byte 44 + .byte 22 + .byte 75 + .byte 37 + .byte 82 + .byte 105 + .byte 116 + .byte 58 + .byte 93 + .byte 110 + .byte 119 + .byte 59 + .byte 29 + .byte 78 + .byte 103 + .byte 51 + .byte 25 + .byte 76 + .byte 38 + .byte 83 + .byte 41 + .byte 84 + .byte 42 + .byte 85 + .byte 106 + .byte 117 + .byte 122 + .byte 125 + .byte 126 + .byte 127 + .byte 63 + .byte 31 + .byte 15 + .byte 7 + .byte 3 + .byte 1 + .byte 64 + .byte 32 + .byte 16 + .byte 8 + .byte 4 + .byte 2 + .byte 65 + .byte 96 + .byte 48 + .byte 24 + .byte 12 + .byte 6 + .byte 67 + .byte 33 + .byte 80 + .byte 40 + .byte 20 + .byte 10 + .byte 69 + .byte 98 + .byte 113 + .byte 120 + .byte 60 + .byte 30 + .byte 79 + .byte 39 + .byte 19 + .byte 9 + .byte 68 + .byte 34 + .byte 81 + .byte 104 + .byte 52 + .byte 26 + .byte 77 + .byte 102 + .byte 115 + .byte 57 + .byte 92 + .byte 46 + .byte 87 + .byte 43 + .byte 21 + .byte 74 + .byte 101 + .byte 114 + .byte 121 + .byte 124 + .byte 62 + .byte 95 + .byte 47 + .byte 23 + .byte 11 + .byte 5 + .byte 66 + .byte 97 + .byte 112 + .byte 56 + .byte 28 + .byte 14 + .byte 71 + .byte 35 + .byte 17 + .byte 72 + .byte 36 + .byte 18 + .byte 73 + .byte 100 + .byte 50 + .byte 89 + .byte 108 + .byte 54 + .byte 91 + .byte 45 + .byte 86 + .byte 107 + .byte 53 + .byte 90 + .byte 109 + .byte 118 + .byte 123 + .byte 61 + .byte 94 + .byte 111 + .byte 55 + .byte 27 + .byte 13 + .byte 70 + + .text +.global wage_permute + .type wage_permute, @function +wage_permute: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,37 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 55 + ld r20,Z + ldd r21,Z+1 + ldd r22,Z+2 + std Y+1,r20 + std Y+2,r21 + std Y+3,r22 + ldd r20,Z+3 + ldd r21,Z+4 + ldd r22,Z+5 + std Y+4,r20 + std Y+5,r21 + std Y+6,r22 + ldd r20,Z+6 + ldd r21,Z+7 + ldd r22,Z+8 + std Y+7,r20 + std Y+8,r21 + std Y+9,r22 + ldd r20,Z+9 + ldd r21,Z+10 + ldd r22,Z+11 + std Y+10,r20 + std Y+11,r21 + std Y+12,r22 + ldd r20,Z+12 + ldd r21,Z+13 + ldd r22,Z+14 + std Y+13,r20 + std Y+14,r21 + std Y+15,r22 + ldd r20,Z+15 + ldd r21,Z+16 + ldd r22,Z+17 + std Y+16,r20 + std Y+17,r21 + std Y+18,r22 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + std Y+19,r20 + std Y+20,r21 + std Y+21,r22 + ldd r20,Z+21 + ldd r21,Z+22 + ldd r22,Z+23 + std Y+22,r20 + std Y+23,r21 + std Y+24,r22 + ldd r20,Z+24 + ldd r21,Z+25 + ldd r22,Z+26 + std Y+25,r20 + std Y+26,r21 + std Y+27,r22 + ldd r20,Z+27 + ldd r21,Z+28 + ldd r22,Z+29 + std Y+28,r20 + std Y+29,r21 + std Y+30,r22 + ldd r20,Z+30 + ldd r21,Z+31 + ldd r22,Z+32 + std Y+31,r20 + std Y+32,r21 + std Y+33,r22 + ldd r20,Z+33 + ldd r21,Z+34 + ldd r22,Z+35 + std Y+34,r20 + std Y+35,r21 + std Y+36,r22 + ldd r20,Z+36 + std Y+37,r20 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r23,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + mov r19,r1 +78: + ldd r20,Y+1 + mov r18,r1 + lsr r20 + sbc r18,r1 + andi r18,120 + eor r20,r18 + ldd r23,Y+7 + eor r20,r23 + ldd r26,Y+9 + eor r20,r26 + ldd r27,Y+13 + eor r20,r27 + ldd r2,Y+14 + eor r20,r2 + ldd r3,Y+20 + eor r20,r3 + ldd r4,Y+25 + eor r20,r4 + ldd r5,Y+27 + eor r20,r5 + ldd r6,Y+31 + eor r20,r6 + ldd r7,Y+32 + eor r20,r7 + ldd r21,Y+2 + mov r18,r1 + lsr r21 + sbc r18,r1 + andi r18,120 + eor r21,r18 + ldd r8,Y+8 + eor r21,r8 + ldd r9,Y+10 + eor r21,r9 + eor r21,r2 + ldd r10,Y+15 + eor r21,r10 + ldd r11,Y+21 + eor r21,r11 + ldd r12,Y+26 + eor r21,r12 + ldd r13,Y+28 + eor r21,r13 + eor r21,r7 + ldd r14,Y+33 + eor r21,r14 + ldd r22,Y+3 + mov r18,r1 + lsr r22 + sbc r18,r1 + andi r18,120 + eor r22,r18 + eor r22,r26 + ldd r15,Y+11 + eor r22,r15 + eor r22,r10 + ldd r24,Y+16 + eor r22,r24 + ldd r25,Y+22 + eor r22,r25 + eor r22,r5 + ldd r16,Y+29 + eor r22,r16 + eor r22,r14 + ldd r17,Y+34 + eor r22,r17 + mov r30,r26 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + ldd r23,Y+6 + eor r23,r18 + mov r30,r9 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + ldd r27,Y+7 + eor r27,r18 + mov r30,r15 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r8,r18 + mov r30,r24 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + ldd r3,Y+12 + eor r3,r18 + ldd r4,Y+17 + mov r30,r4 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + ldd r6,Y+13 + eor r6,r18 + ldd r2,Y+18 + mov r30,r2 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + ldd r11,Y+14 + eor r11,r18 + mov r30,r13 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + ldd r12,Y+25 + eor r12,r18 + mov r30,r16 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + ldd r7,Y+26 + eor r7,r18 + ldd r10,Y+30 + mov r30,r10 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r5,r18 + ldd r25,Y+35 + mov r30,r25 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + ldd r14,Y+31 + eor r14,r18 + ldd r17,Y+36 + mov r30,r17 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + ldd r26,Y+32 + eor r26,r18 + ldd r9,Y+37 + mov r30,r9 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + ldd r15,Y+33 + eor r15,r18 + ldi r30,lo8(table_1) + ldi r31,hi8(table_1) +#if defined(RAMPZ) + ldi r24,hh8(table_1) + out _SFR_IO_ADDR(RAMPZ),r24 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + inc r19 + mov r30,r19 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r20,r18 + inc r19 + mov r30,r19 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + inc r19 + mov r30,r19 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r21,r18 + inc r19 + mov r30,r19 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + inc r19 + mov r30,r19 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r22,r18 + inc r19 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r16,hh8(table_0) + out _SFR_IO_ADDR(RAMPZ),r16 +#endif + ldd r16,Y+19 + mov r30,r16 + ori r30,128 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + ldd r13,Y+20 + eor r13,r18 + eor r13,r2 + mov r30,r13 + ori r30,128 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + ldd r2,Y+21 + eor r2,r18 + eor r2,r4 + mov r30,r2 + ori r30,128 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + ldd r4,Y+22 + eor r4,r18 + eor r4,r24 + mov r30,r9 + ori r30,128 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r20,r18 + mov r30,r20 + ori r30,128 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r21,r18 + mov r30,r21 + ori r30,128 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r22,r18 + ldd r18,Y+4 + std Y+1,r18 + ldd r18,Y+5 + std Y+2,r18 + std Y+3,r23 + std Y+4,r27 + std Y+5,r8 + ldd r18,Y+9 + std Y+6,r18 + ldd r18,Y+10 + std Y+7,r18 + ldd r18,Y+11 + std Y+8,r18 + std Y+9,r3 + std Y+10,r6 + std Y+11,r11 + ldd r18,Y+15 + std Y+12,r18 + ldd r18,Y+16 + std Y+13,r18 + ldd r18,Y+17 + std Y+14,r18 + ldd r18,Y+18 + std Y+15,r18 + std Y+16,r16 + std Y+17,r13 + std Y+18,r2 + std Y+19,r4 + ldd r18,Y+23 + std Y+20,r18 + ldd r18,Y+24 + std Y+21,r18 + std Y+22,r12 + std Y+23,r7 + std Y+24,r5 + ldd r18,Y+28 + std Y+25,r18 + ldd r18,Y+29 + std Y+26,r18 + std Y+27,r10 + std Y+28,r14 + std Y+29,r26 + std Y+30,r15 + ldd r18,Y+34 + std Y+31,r18 + std Y+32,r25 + std Y+33,r17 + std Y+34,r9 + std Y+35,r20 + std Y+36,r21 + std Y+37,r22 + ldi r27,222 + cpse r19,r27 + rjmp 78b +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + st Z,r20 + std Z+1,r21 + std Z+2,r22 + ldd r20,Y+4 + ldd r21,Y+5 + ldd r22,Y+6 + std Z+3,r20 + std Z+4,r21 + std Z+5,r22 + ldd r20,Y+7 + ldd r21,Y+8 + ldd r22,Y+9 + std Z+6,r20 + std Z+7,r21 + std Z+8,r22 + ldd r20,Y+10 + ldd r21,Y+11 + ldd r22,Y+12 + std Z+9,r20 + std Z+10,r21 + std Z+11,r22 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + std Z+12,r20 + std Z+13,r21 + std Z+14,r22 + ldd r20,Y+16 + ldd r21,Y+17 + ldd r22,Y+18 + std Z+15,r20 + std Z+16,r21 + std Z+17,r22 + ldd r20,Y+19 + ldd r21,Y+20 + ldd r22,Y+21 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + ldd r20,Y+22 + ldd r21,Y+23 + ldd r22,Y+24 + std Z+21,r20 + std Z+22,r21 + std Z+23,r22 + ldd r20,Y+25 + ldd r21,Y+26 + ldd r22,Y+27 + std Z+24,r20 + std Z+25,r21 + std Z+26,r22 + ldd r20,Y+28 + ldd r21,Y+29 + ldd r22,Y+30 + std Z+27,r20 + std Z+28,r21 + std Z+29,r22 + ldd r20,Y+31 + ldd r21,Y+32 + ldd r22,Y+33 + std Z+30,r20 + std Z+31,r21 + std Z+32,r22 + ldd r20,Y+34 + ldd r21,Y+35 + ldd r22,Y+36 + std Z+33,r20 + std Z+34,r21 + std Z+35,r22 + ldd r20,Y+37 + std Z+36,r20 + adiw r28,37 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size wage_permute, .-wage_permute + + .text +.global wage_absorb + .type wage_absorb, @function +wage_absorb: + movw r30,r24 + movw r26,r22 +.L__stack_usage = 2 + ld r22,X+ + ld r21,X+ + ld r20,X+ + ld r19,X+ + mov r18,r1 + lsr r22 + ror r21 + ror r20 + ror r19 + ror r18 + ldd r0,Z+8 + eor r0,r22 + std Z+8,r0 + lsr r21 + ror r20 + ror r19 + ror r18 + ldd r0,Z+9 + eor r0,r21 + std Z+9,r0 + lsr r20 + ror r19 + ror r18 + ldd r0,Z+15 + eor r0,r20 + std Z+15,r0 + lsr r19 + ror r18 + ldd r0,Z+16 + eor r0,r19 + std Z+16,r0 + lsr r18 + ldd r0,Z+18 + eor r0,r18 + std Z+18,r0 + ld r22,X+ + ld r21,X+ + ld r20,X+ + ld r19,X+ + mov r23,r1 + mov r18,r1 + lsl r19 + rol r20 + rol r21 + rol r22 + rol r23 + lsl r19 + rol r20 + rol r21 + rol r22 + rol r23 + lsl r19 + rol r20 + rol r21 + rol r22 + rol r23 + ldd r0,Z+18 + eor r0,r23 + std Z+18,r0 + lsr r22 + ror r21 + ror r20 + ror r19 + ldd r0,Z+27 + eor r0,r22 + std Z+27,r0 + lsr r21 + ror r20 + ror r19 + ldd r0,Z+28 + eor r0,r21 + std Z+28,r0 + lsr r20 + ror r19 + ldd r0,Z+34 + eor r0,r20 + std Z+34,r0 + lsr r19 + ror r18 + ldd r0,Z+35 + eor r0,r19 + std Z+35,r0 + lsr r18 + ldd r0,Z+36 + eor r0,r18 + std Z+36,r0 + ret + .size wage_absorb, .-wage_absorb + + .text +.global wage_get_rate + .type wage_get_rate, @function +wage_get_rate: + movw r30,r24 + movw r26,r22 +.L__stack_usage = 2 + ldd r21,Z+8 + ldd r20,Z+9 + ldd r19,Z+15 + ldd r18,Z+16 + lsl r18 + lsl r18 + rol r19 + lsl r18 + rol r19 + rol r20 + lsl r18 + rol r19 + rol r20 + rol r21 + ldd r22,Z+18 + lsr r22 + lsr r22 + lsr r22 + or r18,r22 + st X+,r21 + st X+,r20 + st X+,r19 + st X+,r18 + ldd r21,Z+18 + ldd r20,Z+27 + ldd r19,Z+28 + ldd r18,Z+34 + lsl r18 + lsl r18 + rol r19 + lsl r18 + rol r19 + rol r20 + lsr r21 + ror r20 + ror r19 + ror r18 + lsr r21 + ror r20 + ror r19 + ror r18 + lsr r21 + ror r20 + ror r19 + ror r18 + st X+,r20 + st X+,r19 + st X+,r18 + ldd r19,Z+35 + ldd r18,Z+36 + lsl r18 + lsl r18 + rol r19 + st X+,r19 + ret + .size wage_get_rate, .-wage_get_rate + + .text +.global wage_set_rate + .type wage_set_rate, @function +wage_set_rate: + movw r30,r24 + movw r26,r22 +.L__stack_usage = 2 + ld r22,X+ + ld r21,X+ + ld r20,X+ + ld r19,X+ + mov r18,r1 + lsr r22 + ror r21 + ror r20 + ror r19 + ror r18 + std Z+8,r22 + lsr r21 + ror r20 + ror r19 + ror r18 + std Z+9,r21 + lsr r20 + ror r19 + ror r18 + std Z+15,r20 + lsr r19 + ror r18 + std Z+16,r19 + lsr r18 + std Z+18,r18 + ld r22,X+ + ld r21,X+ + ld r20,X+ + ld r19,X+ + mov r23,r1 + mov r18,r1 + lsl r19 + rol r20 + rol r21 + rol r22 + rol r23 + lsl r19 + rol r20 + rol r21 + rol r22 + rol r23 + lsl r19 + rol r20 + rol r21 + rol r22 + rol r23 + ldd r0,Z+18 + eor r0,r23 + std Z+18,r0 + lsr r22 + ror r21 + ror r20 + ror r19 + std Z+27,r22 + lsr r21 + ror r20 + ror r19 + std Z+28,r21 + lsr r20 + ror r19 + std Z+34,r20 + lsr r19 + ror r18 + std Z+35,r19 + lsr r18 + ldd r24,Z+36 + andi r24,63 + eor r24,r18 + std Z+36,r24 + ret + .size wage_set_rate, .-wage_set_rate + +#endif diff --git a/wage/Implementations/crypto_aead/wageae128v1/rhys/internal-wage.c b/wage/Implementations/crypto_aead/wageae128v1/rhys/internal-wage.c index e9528c9..6741643 100644 --- a/wage/Implementations/crypto_aead/wageae128v1/rhys/internal-wage.c +++ b/wage/Implementations/crypto_aead/wageae128v1/rhys/internal-wage.c @@ -33,6 +33,8 @@ */ #define WAGE_64BIT 1 +#if !defined(__AVR__) + /** * \brief RC0 and RC1 round constants for WAGE, interleaved with each other. */ @@ -287,8 +289,7 @@ void wage_permute(unsigned char s[WAGE_STATE_SIZE]) /* 7-bit components for the rate: 8, 9, 15, 16, 18, 27, 28, 34, 35, 36 */ void wage_absorb - (unsigned char s[WAGE_STATE_SIZE], const unsigned char data[8], - unsigned char domain) + (unsigned char s[WAGE_STATE_SIZE], const unsigned char data[8]) { uint32_t temp; temp = be_load_word32(data); @@ -304,7 +305,6 @@ void wage_absorb s[34] ^= (unsigned char)((temp >> 8) & 0x7F); s[35] ^= (unsigned char)((temp >> 1) & 0x7F); s[36] ^= (unsigned char)((temp << 6) & 0x7F); - s[0] ^= domain; } void wage_get_rate @@ -327,8 +327,7 @@ void wage_get_rate } void wage_set_rate - (unsigned char s[WAGE_STATE_SIZE], const unsigned char data[8], - unsigned char domain) + (unsigned char s[WAGE_STATE_SIZE], const unsigned char data[8]) { uint32_t temp; temp = be_load_word32(data); @@ -344,9 +343,10 @@ void wage_set_rate s[34] = (unsigned char)((temp >> 8) & 0x7F); s[35] = (unsigned char)((temp >> 1) & 0x7F); s[36] = (unsigned char)(((temp << 6) & 0x40) ^ (s[36] & 0x3F)); - s[0] ^= domain; } +#endif /* !__AVR__ */ + /** * \brief Converts a 128-bit value into an array of 7-bit components. * diff --git a/wage/Implementations/crypto_aead/wageae128v1/rhys/internal-wage.h b/wage/Implementations/crypto_aead/wageae128v1/rhys/internal-wage.h index a0d23d7..2663e72 100644 --- a/wage/Implementations/crypto_aead/wageae128v1/rhys/internal-wage.h +++ b/wage/Implementations/crypto_aead/wageae128v1/rhys/internal-wage.h @@ -55,11 +55,9 @@ void wage_permute(unsigned char s[WAGE_STATE_SIZE]); * * \param s The WAGE state to be permuted. * \param data The data to be absorbed. - * \param domain The domain separator for the absorbed data. */ void wage_absorb - (unsigned char s[WAGE_STATE_SIZE], const unsigned char data[8], - unsigned char domain); + (unsigned char s[WAGE_STATE_SIZE], const unsigned char data[8]); /** * \brief Gets the 8 bytes of the rate from the WAGE state. @@ -75,11 +73,9 @@ void wage_get_rate * * \param s The WAGE state to set the rate in. * \param data Points to the bytes to set into the rate. - * \param domain The domain separator for the rate data. */ void wage_set_rate - (unsigned char s[WAGE_STATE_SIZE], const unsigned char data[8], - unsigned char domain); + (unsigned char s[WAGE_STATE_SIZE], const unsigned char data[8]); /** * \brief Absorbs 16 key bytes into the WAGE state. diff --git a/wage/Implementations/crypto_aead/wageae128v1/rhys/wage.c b/wage/Implementations/crypto_aead/wageae128v1/rhys/wage.c index 374409b..bede1c0 100644 --- a/wage/Implementations/crypto_aead/wageae128v1/rhys/wage.c +++ b/wage/Implementations/crypto_aead/wageae128v1/rhys/wage.c @@ -55,7 +55,8 @@ static void wage_process_ad /* Process as many full blocks as possible */ while (adlen >= WAGE_RATE) { - wage_absorb(state, ad, 0x40); + wage_absorb(state, ad); + state[0] ^= 0x40; wage_permute(state); ad += WAGE_RATE; adlen -= WAGE_RATE; @@ -66,7 +67,8 @@ static void wage_process_ad memcpy(pad, ad, temp); pad[temp] = 0x80; memset(pad + temp + 1, 0, WAGE_RATE - temp - 1); - wage_absorb(state, pad, 0x40); + wage_absorb(state, pad); + state[0] ^= 0x40; wage_permute(state); } @@ -95,7 +97,8 @@ int wage_aead_encrypt while (mlen >= WAGE_RATE) { wage_get_rate(state, block); lw_xor_block(block, m, WAGE_RATE); - wage_set_rate(state, block, 0x20); + wage_set_rate(state, block); + state[0] ^= 0x20; wage_permute(state); memcpy(c, block, WAGE_RATE); c += WAGE_RATE; @@ -106,7 +109,8 @@ int wage_aead_encrypt wage_get_rate(state, block); lw_xor_block(block, m, temp); block[temp] ^= 0x80; - wage_set_rate(state, block, 0x20); + wage_set_rate(state, block); + state[0] ^= 0x20; wage_permute(state); memcpy(c, block, temp); @@ -145,7 +149,8 @@ int wage_aead_decrypt while (clen >= WAGE_RATE) { wage_get_rate(state, block); lw_xor_block(block, c, WAGE_RATE); - wage_set_rate(state, c, 0x20); + wage_set_rate(state, c); + state[0] ^= 0x20; wage_permute(state); memcpy(m, block, WAGE_RATE); c += WAGE_RATE; @@ -157,7 +162,8 @@ int wage_aead_decrypt lw_xor_block_2_src(block + 8, block, c, temp); memcpy(block, c, temp); block[temp] ^= 0x80; - wage_set_rate(state, block, 0x20); + wage_set_rate(state, block); + state[0] ^= 0x20; wage_permute(state); memcpy(m, block + 8, temp);