diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/rhys/forkae.c b/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/rhys/forkae.c index 4a9671a..49e7610 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/rhys/forkae.c +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/rhys/forkae.c @@ -22,7 +22,6 @@ #include "forkae.h" #include "internal-forkskinny.h" -#include "internal-util.h" #include aead_cipher_t const forkae_paef_64_192_cipher = { @@ -138,3 +137,476 @@ aead_cipher_t const forkae_saef_128_256_cipher = { #define FORKAE_TWEAKEY_REDUCED_SIZE 32 #define FORKAE_BLOCK_FUNC forkskinny_128_256 #include "internal-forkae-saef.h" + +/* Helper functions to implement the forking encrypt/decrypt block operations + * on top of the basic "perform N rounds" functions in internal-forkskinny.c */ + +/** + * \brief Number of rounds of ForkSkinny-128-256 before forking. + */ +#define FORKSKINNY_128_256_ROUNDS_BEFORE 21 + +/** + * \brief Number of rounds of ForkSkinny-128-256 after forking. + */ +#define FORKSKINNY_128_256_ROUNDS_AFTER 27 + +void forkskinny_128_256_encrypt + (const unsigned char key[32], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_256_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Run all of the rounds before the forking point */ + forkskinny_128_256_rounds(&state, 0, FORKSKINNY_128_256_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint32_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x08040201U; /* Branching constant */ + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + } +} + +void forkskinny_128_256_decrypt + (const unsigned char key[32], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_256_state_t state; + forkskinny_128_256_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_128_256_forward_tk + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_128_256_inv_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x08040201U; + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_128_256_reverse_tk(&state, FORKSKINNY_128_256_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_128_256_inv_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, 0); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_128_256_rounds + (&fstate, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, fstate.S[0]); + le_store_word32(output_right + 4, fstate.S[1]); + le_store_word32(output_right + 8, fstate.S[2]); + le_store_word32(output_right + 12, fstate.S[3]); +} + +/** + * \brief Number of rounds of ForkSkinny-128-384 before forking. + */ +#define FORKSKINNY_128_384_ROUNDS_BEFORE 25 + +/** + * \brief Number of rounds of ForkSkinny-128-384 after forking. + */ +#define FORKSKINNY_128_384_ROUNDS_AFTER 31 + +void forkskinny_128_384_encrypt + (const unsigned char key[48], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_384_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.TK3[0] = le_load_word32(key + 32); + state.TK3[1] = le_load_word32(key + 36); + state.TK3[2] = le_load_word32(key + 40); + state.TK3[3] = le_load_word32(key + 44); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Run all of the rounds before the forking point */ + forkskinny_128_384_rounds(&state, 0, FORKSKINNY_128_384_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint32_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x08040201U; /* Branching constant */ + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + } +} + +void forkskinny_128_384_decrypt + (const unsigned char key[48], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_384_state_t state; + forkskinny_128_384_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.TK3[0] = le_load_word32(key + 32); + state.TK3[1] = le_load_word32(key + 36); + state.TK3[2] = le_load_word32(key + 40); + state.TK3[3] = le_load_word32(key + 44); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_128_384_forward_tk + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_128_384_inv_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x08040201U; + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_128_384_reverse_tk(&state, FORKSKINNY_128_384_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_128_384_inv_rounds(&state, FORKSKINNY_128_384_ROUNDS_BEFORE, 0); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_128_384_rounds + (&fstate, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, fstate.S[0]); + le_store_word32(output_right + 4, fstate.S[1]); + le_store_word32(output_right + 8, fstate.S[2]); + le_store_word32(output_right + 12, fstate.S[3]); +} + +/** + * \brief Number of rounds of ForkSkinny-64-192 before forking. + */ +#define FORKSKINNY_64_192_ROUNDS_BEFORE 17 + +/** + * \brief Number of rounds of ForkSkinny-64-192 after forking. + */ +#define FORKSKINNY_64_192_ROUNDS_AFTER 23 + +void forkskinny_64_192_encrypt + (const unsigned char key[24], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_64_192_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = be_load_word16(key); + state.TK1[1] = be_load_word16(key + 2); + state.TK1[2] = be_load_word16(key + 4); + state.TK1[3] = be_load_word16(key + 6); + state.TK2[0] = be_load_word16(key + 8); + state.TK2[1] = be_load_word16(key + 10); + state.TK2[2] = be_load_word16(key + 12); + state.TK2[3] = be_load_word16(key + 14); + state.TK3[0] = be_load_word16(key + 16); + state.TK3[1] = be_load_word16(key + 18); + state.TK3[2] = be_load_word16(key + 20); + state.TK3[3] = be_load_word16(key + 22); + state.S[0] = be_load_word16(input); + state.S[1] = be_load_word16(input + 2); + state.S[2] = be_load_word16(input + 4); + state.S[3] = be_load_word16(input + 6); + + /* Run all of the rounds before the forking point */ + forkskinny_64_192_rounds(&state, 0, FORKSKINNY_64_192_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint16_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, state.S[0]); + be_store_word16(output_right + 2, state.S[1]); + be_store_word16(output_right + 4, state.S[2]); + be_store_word16(output_right + 6, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x1249U; /* Branching constant */ + state.S[1] ^= 0x36daU; + state.S[2] ^= 0x5b7fU; + state.S[3] ^= 0xec81U; + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2); + be_store_word16(output_left, state.S[0]); + be_store_word16(output_left + 2, state.S[1]); + be_store_word16(output_left + 4, state.S[2]); + be_store_word16(output_left + 6, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, state.S[0]); + be_store_word16(output_right + 2, state.S[1]); + be_store_word16(output_right + 4, state.S[2]); + be_store_word16(output_right + 6, state.S[3]); + } +} + +void forkskinny_64_192_decrypt + (const unsigned char key[24], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_64_192_state_t state; + forkskinny_64_192_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = be_load_word16(key); + state.TK1[1] = be_load_word16(key + 2); + state.TK1[2] = be_load_word16(key + 4); + state.TK1[3] = be_load_word16(key + 6); + state.TK2[0] = be_load_word16(key + 8); + state.TK2[1] = be_load_word16(key + 10); + state.TK2[2] = be_load_word16(key + 12); + state.TK2[3] = be_load_word16(key + 14); + state.TK3[0] = be_load_word16(key + 16); + state.TK3[1] = be_load_word16(key + 18); + state.TK3[2] = be_load_word16(key + 20); + state.TK3[3] = be_load_word16(key + 22); + state.S[0] = be_load_word16(input); + state.S[1] = be_load_word16(input + 2); + state.S[2] = be_load_word16(input + 4); + state.S[3] = be_load_word16(input + 6); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_64_192_forward_tk + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_64_192_inv_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x1249U; + state.S[1] ^= 0x36daU; + state.S[2] ^= 0x5b7fU; + state.S[3] ^= 0xec81U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_64_192_reverse_tk(&state, FORKSKINNY_64_192_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_64_192_inv_rounds(&state, FORKSKINNY_64_192_ROUNDS_BEFORE, 0); + be_store_word16(output_left, state.S[0]); + be_store_word16(output_left + 2, state.S[1]); + be_store_word16(output_left + 4, state.S[2]); + be_store_word16(output_left + 6, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_64_192_rounds + (&fstate, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, fstate.S[0]); + be_store_word16(output_right + 2, fstate.S[1]); + be_store_word16(output_right + 4, fstate.S[2]); + be_store_word16(output_right + 6, fstate.S[3]); +} diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/rhys/internal-forkskinny-avr.S b/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/rhys/internal-forkskinny-avr.S new file mode 100644 index 0000000..c7e0b37 --- /dev/null +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/rhys/internal-forkskinny-avr.S @@ -0,0 +1,8880 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_0, @object + .size table_0, 256 +table_0: + .byte 101 + .byte 76 + .byte 106 + .byte 66 + .byte 75 + .byte 99 + .byte 67 + .byte 107 + .byte 85 + .byte 117 + .byte 90 + .byte 122 + .byte 83 + .byte 115 + .byte 91 + .byte 123 + .byte 53 + .byte 140 + .byte 58 + .byte 129 + .byte 137 + .byte 51 + .byte 128 + .byte 59 + .byte 149 + .byte 37 + .byte 152 + .byte 42 + .byte 144 + .byte 35 + .byte 153 + .byte 43 + .byte 229 + .byte 204 + .byte 232 + .byte 193 + .byte 201 + .byte 224 + .byte 192 + .byte 233 + .byte 213 + .byte 245 + .byte 216 + .byte 248 + .byte 208 + .byte 240 + .byte 217 + .byte 249 + .byte 165 + .byte 28 + .byte 168 + .byte 18 + .byte 27 + .byte 160 + .byte 19 + .byte 169 + .byte 5 + .byte 181 + .byte 10 + .byte 184 + .byte 3 + .byte 176 + .byte 11 + .byte 185 + .byte 50 + .byte 136 + .byte 60 + .byte 133 + .byte 141 + .byte 52 + .byte 132 + .byte 61 + .byte 145 + .byte 34 + .byte 156 + .byte 44 + .byte 148 + .byte 36 + .byte 157 + .byte 45 + .byte 98 + .byte 74 + .byte 108 + .byte 69 + .byte 77 + .byte 100 + .byte 68 + .byte 109 + .byte 82 + .byte 114 + .byte 92 + .byte 124 + .byte 84 + .byte 116 + .byte 93 + .byte 125 + .byte 161 + .byte 26 + .byte 172 + .byte 21 + .byte 29 + .byte 164 + .byte 20 + .byte 173 + .byte 2 + .byte 177 + .byte 12 + .byte 188 + .byte 4 + .byte 180 + .byte 13 + .byte 189 + .byte 225 + .byte 200 + .byte 236 + .byte 197 + .byte 205 + .byte 228 + .byte 196 + .byte 237 + .byte 209 + .byte 241 + .byte 220 + .byte 252 + .byte 212 + .byte 244 + .byte 221 + .byte 253 + .byte 54 + .byte 142 + .byte 56 + .byte 130 + .byte 139 + .byte 48 + .byte 131 + .byte 57 + .byte 150 + .byte 38 + .byte 154 + .byte 40 + .byte 147 + .byte 32 + .byte 155 + .byte 41 + .byte 102 + .byte 78 + .byte 104 + .byte 65 + .byte 73 + .byte 96 + .byte 64 + .byte 105 + .byte 86 + .byte 118 + .byte 88 + .byte 120 + .byte 80 + .byte 112 + .byte 89 + .byte 121 + .byte 166 + .byte 30 + .byte 170 + .byte 17 + .byte 25 + .byte 163 + .byte 16 + .byte 171 + .byte 6 + .byte 182 + .byte 8 + .byte 186 + .byte 0 + .byte 179 + .byte 9 + .byte 187 + .byte 230 + .byte 206 + .byte 234 + .byte 194 + .byte 203 + .byte 227 + .byte 195 + .byte 235 + .byte 214 + .byte 246 + .byte 218 + .byte 250 + .byte 211 + .byte 243 + .byte 219 + .byte 251 + .byte 49 + .byte 138 + .byte 62 + .byte 134 + .byte 143 + .byte 55 + .byte 135 + .byte 63 + .byte 146 + .byte 33 + .byte 158 + .byte 46 + .byte 151 + .byte 39 + .byte 159 + .byte 47 + .byte 97 + .byte 72 + .byte 110 + .byte 70 + .byte 79 + .byte 103 + .byte 71 + .byte 111 + .byte 81 + .byte 113 + .byte 94 + .byte 126 + .byte 87 + .byte 119 + .byte 95 + .byte 127 + .byte 162 + .byte 24 + .byte 174 + .byte 22 + .byte 31 + .byte 167 + .byte 23 + .byte 175 + .byte 1 + .byte 178 + .byte 14 + .byte 190 + .byte 7 + .byte 183 + .byte 15 + .byte 191 + .byte 226 + .byte 202 + .byte 238 + .byte 198 + .byte 207 + .byte 231 + .byte 199 + .byte 239 + .byte 210 + .byte 242 + .byte 222 + .byte 254 + .byte 215 + .byte 247 + .byte 223 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_1, @object + .size table_1, 256 +table_1: + .byte 172 + .byte 232 + .byte 104 + .byte 60 + .byte 108 + .byte 56 + .byte 168 + .byte 236 + .byte 170 + .byte 174 + .byte 58 + .byte 62 + .byte 106 + .byte 110 + .byte 234 + .byte 238 + .byte 166 + .byte 163 + .byte 51 + .byte 54 + .byte 102 + .byte 99 + .byte 227 + .byte 230 + .byte 225 + .byte 164 + .byte 97 + .byte 52 + .byte 49 + .byte 100 + .byte 161 + .byte 228 + .byte 141 + .byte 201 + .byte 73 + .byte 29 + .byte 77 + .byte 25 + .byte 137 + .byte 205 + .byte 139 + .byte 143 + .byte 27 + .byte 31 + .byte 75 + .byte 79 + .byte 203 + .byte 207 + .byte 133 + .byte 192 + .byte 64 + .byte 21 + .byte 69 + .byte 16 + .byte 128 + .byte 197 + .byte 130 + .byte 135 + .byte 18 + .byte 23 + .byte 66 + .byte 71 + .byte 194 + .byte 199 + .byte 150 + .byte 147 + .byte 3 + .byte 6 + .byte 86 + .byte 83 + .byte 211 + .byte 214 + .byte 209 + .byte 148 + .byte 81 + .byte 4 + .byte 1 + .byte 84 + .byte 145 + .byte 212 + .byte 156 + .byte 216 + .byte 88 + .byte 12 + .byte 92 + .byte 8 + .byte 152 + .byte 220 + .byte 154 + .byte 158 + .byte 10 + .byte 14 + .byte 90 + .byte 94 + .byte 218 + .byte 222 + .byte 149 + .byte 208 + .byte 80 + .byte 5 + .byte 85 + .byte 0 + .byte 144 + .byte 213 + .byte 146 + .byte 151 + .byte 2 + .byte 7 + .byte 82 + .byte 87 + .byte 210 + .byte 215 + .byte 157 + .byte 217 + .byte 89 + .byte 13 + .byte 93 + .byte 9 + .byte 153 + .byte 221 + .byte 155 + .byte 159 + .byte 11 + .byte 15 + .byte 91 + .byte 95 + .byte 219 + .byte 223 + .byte 22 + .byte 19 + .byte 131 + .byte 134 + .byte 70 + .byte 67 + .byte 195 + .byte 198 + .byte 65 + .byte 20 + .byte 193 + .byte 132 + .byte 17 + .byte 68 + .byte 129 + .byte 196 + .byte 28 + .byte 72 + .byte 200 + .byte 140 + .byte 76 + .byte 24 + .byte 136 + .byte 204 + .byte 26 + .byte 30 + .byte 138 + .byte 142 + .byte 74 + .byte 78 + .byte 202 + .byte 206 + .byte 53 + .byte 96 + .byte 224 + .byte 165 + .byte 101 + .byte 48 + .byte 160 + .byte 229 + .byte 50 + .byte 55 + .byte 162 + .byte 167 + .byte 98 + .byte 103 + .byte 226 + .byte 231 + .byte 61 + .byte 105 + .byte 233 + .byte 173 + .byte 109 + .byte 57 + .byte 169 + .byte 237 + .byte 59 + .byte 63 + .byte 171 + .byte 175 + .byte 107 + .byte 111 + .byte 235 + .byte 239 + .byte 38 + .byte 35 + .byte 179 + .byte 182 + .byte 118 + .byte 115 + .byte 243 + .byte 246 + .byte 113 + .byte 36 + .byte 241 + .byte 180 + .byte 33 + .byte 116 + .byte 177 + .byte 244 + .byte 44 + .byte 120 + .byte 248 + .byte 188 + .byte 124 + .byte 40 + .byte 184 + .byte 252 + .byte 42 + .byte 46 + .byte 186 + .byte 190 + .byte 122 + .byte 126 + .byte 250 + .byte 254 + .byte 37 + .byte 112 + .byte 240 + .byte 181 + .byte 117 + .byte 32 + .byte 176 + .byte 245 + .byte 34 + .byte 39 + .byte 178 + .byte 183 + .byte 114 + .byte 119 + .byte 242 + .byte 247 + .byte 45 + .byte 121 + .byte 249 + .byte 189 + .byte 125 + .byte 41 + .byte 185 + .byte 253 + .byte 43 + .byte 47 + .byte 187 + .byte 191 + .byte 123 + .byte 127 + .byte 251 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_2, @object + .size table_2, 256 +table_2: + .byte 0 + .byte 2 + .byte 4 + .byte 6 + .byte 8 + .byte 10 + .byte 12 + .byte 14 + .byte 16 + .byte 18 + .byte 20 + .byte 22 + .byte 24 + .byte 26 + .byte 28 + .byte 30 + .byte 32 + .byte 34 + .byte 36 + .byte 38 + .byte 40 + .byte 42 + .byte 44 + .byte 46 + .byte 48 + .byte 50 + .byte 52 + .byte 54 + .byte 56 + .byte 58 + .byte 60 + .byte 62 + .byte 65 + .byte 67 + .byte 69 + .byte 71 + .byte 73 + .byte 75 + .byte 77 + .byte 79 + .byte 81 + .byte 83 + .byte 85 + .byte 87 + .byte 89 + .byte 91 + .byte 93 + .byte 95 + .byte 97 + .byte 99 + .byte 101 + .byte 103 + .byte 105 + .byte 107 + .byte 109 + .byte 111 + .byte 113 + .byte 115 + .byte 117 + .byte 119 + .byte 121 + .byte 123 + .byte 125 + .byte 127 + .byte 128 + .byte 130 + .byte 132 + .byte 134 + .byte 136 + .byte 138 + .byte 140 + .byte 142 + .byte 144 + .byte 146 + .byte 148 + .byte 150 + .byte 152 + .byte 154 + .byte 156 + .byte 158 + .byte 160 + .byte 162 + .byte 164 + .byte 166 + .byte 168 + .byte 170 + .byte 172 + .byte 174 + .byte 176 + .byte 178 + .byte 180 + .byte 182 + .byte 184 + .byte 186 + .byte 188 + .byte 190 + .byte 193 + .byte 195 + .byte 197 + .byte 199 + .byte 201 + .byte 203 + .byte 205 + .byte 207 + .byte 209 + .byte 211 + .byte 213 + .byte 215 + .byte 217 + .byte 219 + .byte 221 + .byte 223 + .byte 225 + .byte 227 + .byte 229 + .byte 231 + .byte 233 + .byte 235 + .byte 237 + .byte 239 + .byte 241 + .byte 243 + .byte 245 + .byte 247 + .byte 249 + .byte 251 + .byte 253 + .byte 255 + .byte 1 + .byte 3 + .byte 5 + .byte 7 + .byte 9 + .byte 11 + .byte 13 + .byte 15 + .byte 17 + .byte 19 + .byte 21 + .byte 23 + .byte 25 + .byte 27 + .byte 29 + .byte 31 + .byte 33 + .byte 35 + .byte 37 + .byte 39 + .byte 41 + .byte 43 + .byte 45 + .byte 47 + .byte 49 + .byte 51 + .byte 53 + .byte 55 + .byte 57 + .byte 59 + .byte 61 + .byte 63 + .byte 64 + .byte 66 + .byte 68 + .byte 70 + .byte 72 + .byte 74 + .byte 76 + .byte 78 + .byte 80 + .byte 82 + .byte 84 + .byte 86 + .byte 88 + .byte 90 + .byte 92 + .byte 94 + .byte 96 + .byte 98 + .byte 100 + .byte 102 + .byte 104 + .byte 106 + .byte 108 + .byte 110 + .byte 112 + .byte 114 + .byte 116 + .byte 118 + .byte 120 + .byte 122 + .byte 124 + .byte 126 + .byte 129 + .byte 131 + .byte 133 + .byte 135 + .byte 137 + .byte 139 + .byte 141 + .byte 143 + .byte 145 + .byte 147 + .byte 149 + .byte 151 + .byte 153 + .byte 155 + .byte 157 + .byte 159 + .byte 161 + .byte 163 + .byte 165 + .byte 167 + .byte 169 + .byte 171 + .byte 173 + .byte 175 + .byte 177 + .byte 179 + .byte 181 + .byte 183 + .byte 185 + .byte 187 + .byte 189 + .byte 191 + .byte 192 + .byte 194 + .byte 196 + .byte 198 + .byte 200 + .byte 202 + .byte 204 + .byte 206 + .byte 208 + .byte 210 + .byte 212 + .byte 214 + .byte 216 + .byte 218 + .byte 220 + .byte 222 + .byte 224 + .byte 226 + .byte 228 + .byte 230 + .byte 232 + .byte 234 + .byte 236 + .byte 238 + .byte 240 + .byte 242 + .byte 244 + .byte 246 + .byte 248 + .byte 250 + .byte 252 + .byte 254 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_3, @object + .size table_3, 256 +table_3: + .byte 0 + .byte 128 + .byte 1 + .byte 129 + .byte 2 + .byte 130 + .byte 3 + .byte 131 + .byte 4 + .byte 132 + .byte 5 + .byte 133 + .byte 6 + .byte 134 + .byte 7 + .byte 135 + .byte 8 + .byte 136 + .byte 9 + .byte 137 + .byte 10 + .byte 138 + .byte 11 + .byte 139 + .byte 12 + .byte 140 + .byte 13 + .byte 141 + .byte 14 + .byte 142 + .byte 15 + .byte 143 + .byte 16 + .byte 144 + .byte 17 + .byte 145 + .byte 18 + .byte 146 + .byte 19 + .byte 147 + .byte 20 + .byte 148 + .byte 21 + .byte 149 + .byte 22 + .byte 150 + .byte 23 + .byte 151 + .byte 24 + .byte 152 + .byte 25 + .byte 153 + .byte 26 + .byte 154 + .byte 27 + .byte 155 + .byte 28 + .byte 156 + .byte 29 + .byte 157 + .byte 30 + .byte 158 + .byte 31 + .byte 159 + .byte 160 + .byte 32 + .byte 161 + .byte 33 + .byte 162 + .byte 34 + .byte 163 + .byte 35 + .byte 164 + .byte 36 + .byte 165 + .byte 37 + .byte 166 + .byte 38 + .byte 167 + .byte 39 + .byte 168 + .byte 40 + .byte 169 + .byte 41 + .byte 170 + .byte 42 + .byte 171 + .byte 43 + .byte 172 + .byte 44 + .byte 173 + .byte 45 + .byte 174 + .byte 46 + .byte 175 + .byte 47 + .byte 176 + .byte 48 + .byte 177 + .byte 49 + .byte 178 + .byte 50 + .byte 179 + .byte 51 + .byte 180 + .byte 52 + .byte 181 + .byte 53 + .byte 182 + .byte 54 + .byte 183 + .byte 55 + .byte 184 + .byte 56 + .byte 185 + .byte 57 + .byte 186 + .byte 58 + .byte 187 + .byte 59 + .byte 188 + .byte 60 + .byte 189 + .byte 61 + .byte 190 + .byte 62 + .byte 191 + .byte 63 + .byte 64 + .byte 192 + .byte 65 + .byte 193 + .byte 66 + .byte 194 + .byte 67 + .byte 195 + .byte 68 + .byte 196 + .byte 69 + .byte 197 + .byte 70 + .byte 198 + .byte 71 + .byte 199 + .byte 72 + .byte 200 + .byte 73 + .byte 201 + .byte 74 + .byte 202 + .byte 75 + .byte 203 + .byte 76 + .byte 204 + .byte 77 + .byte 205 + .byte 78 + .byte 206 + .byte 79 + .byte 207 + .byte 80 + .byte 208 + .byte 81 + .byte 209 + .byte 82 + .byte 210 + .byte 83 + .byte 211 + .byte 84 + .byte 212 + .byte 85 + .byte 213 + .byte 86 + .byte 214 + .byte 87 + .byte 215 + .byte 88 + .byte 216 + .byte 89 + .byte 217 + .byte 90 + .byte 218 + .byte 91 + .byte 219 + .byte 92 + .byte 220 + .byte 93 + .byte 221 + .byte 94 + .byte 222 + .byte 95 + .byte 223 + .byte 224 + .byte 96 + .byte 225 + .byte 97 + .byte 226 + .byte 98 + .byte 227 + .byte 99 + .byte 228 + .byte 100 + .byte 229 + .byte 101 + .byte 230 + .byte 102 + .byte 231 + .byte 103 + .byte 232 + .byte 104 + .byte 233 + .byte 105 + .byte 234 + .byte 106 + .byte 235 + .byte 107 + .byte 236 + .byte 108 + .byte 237 + .byte 109 + .byte 238 + .byte 110 + .byte 239 + .byte 111 + .byte 240 + .byte 112 + .byte 241 + .byte 113 + .byte 242 + .byte 114 + .byte 243 + .byte 115 + .byte 244 + .byte 116 + .byte 245 + .byte 117 + .byte 246 + .byte 118 + .byte 247 + .byte 119 + .byte 248 + .byte 120 + .byte 249 + .byte 121 + .byte 250 + .byte 122 + .byte 251 + .byte 123 + .byte 252 + .byte 124 + .byte 253 + .byte 125 + .byte 254 + .byte 126 + .byte 255 + .byte 127 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_4, @object + .size table_4, 174 +table_4: + .byte 1 + .byte 0 + .byte 3 + .byte 0 + .byte 7 + .byte 0 + .byte 15 + .byte 0 + .byte 15 + .byte 1 + .byte 15 + .byte 3 + .byte 14 + .byte 7 + .byte 13 + .byte 7 + .byte 11 + .byte 7 + .byte 7 + .byte 7 + .byte 15 + .byte 6 + .byte 15 + .byte 5 + .byte 14 + .byte 3 + .byte 12 + .byte 7 + .byte 9 + .byte 7 + .byte 3 + .byte 7 + .byte 7 + .byte 6 + .byte 15 + .byte 4 + .byte 14 + .byte 1 + .byte 13 + .byte 3 + .byte 10 + .byte 7 + .byte 5 + .byte 7 + .byte 11 + .byte 6 + .byte 7 + .byte 5 + .byte 14 + .byte 2 + .byte 12 + .byte 5 + .byte 8 + .byte 3 + .byte 0 + .byte 7 + .byte 1 + .byte 6 + .byte 3 + .byte 4 + .byte 6 + .byte 0 + .byte 13 + .byte 0 + .byte 11 + .byte 1 + .byte 7 + .byte 3 + .byte 14 + .byte 6 + .byte 13 + .byte 5 + .byte 10 + .byte 3 + .byte 4 + .byte 7 + .byte 9 + .byte 6 + .byte 3 + .byte 5 + .byte 6 + .byte 2 + .byte 12 + .byte 4 + .byte 8 + .byte 1 + .byte 1 + .byte 3 + .byte 2 + .byte 6 + .byte 5 + .byte 4 + .byte 10 + .byte 0 + .byte 5 + .byte 1 + .byte 11 + .byte 2 + .byte 6 + .byte 5 + .byte 12 + .byte 2 + .byte 8 + .byte 5 + .byte 0 + .byte 3 + .byte 0 + .byte 6 + .byte 1 + .byte 4 + .byte 2 + .byte 0 + .byte 5 + .byte 0 + .byte 11 + .byte 0 + .byte 7 + .byte 1 + .byte 15 + .byte 2 + .byte 14 + .byte 5 + .byte 12 + .byte 3 + .byte 8 + .byte 7 + .byte 1 + .byte 7 + .byte 3 + .byte 6 + .byte 7 + .byte 4 + .byte 14 + .byte 0 + .byte 13 + .byte 1 + .byte 11 + .byte 3 + .byte 6 + .byte 7 + .byte 13 + .byte 6 + .byte 11 + .byte 5 + .byte 6 + .byte 3 + .byte 12 + .byte 6 + .byte 9 + .byte 5 + .byte 2 + .byte 3 + .byte 4 + .byte 6 + .byte 9 + .byte 4 + .byte 2 + .byte 1 + .byte 5 + .byte 2 + .byte 10 + .byte 4 + .byte 4 + .byte 1 + .byte 9 + .byte 2 + .byte 2 + .byte 5 + .byte 4 + .byte 2 + .byte 8 + .byte 4 + .byte 0 + .byte 1 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_5, @object + .size table_5, 256 +table_5: + .byte 204 + .byte 198 + .byte 201 + .byte 192 + .byte 193 + .byte 202 + .byte 194 + .byte 203 + .byte 195 + .byte 200 + .byte 197 + .byte 205 + .byte 196 + .byte 206 + .byte 199 + .byte 207 + .byte 108 + .byte 102 + .byte 105 + .byte 96 + .byte 97 + .byte 106 + .byte 98 + .byte 107 + .byte 99 + .byte 104 + .byte 101 + .byte 109 + .byte 100 + .byte 110 + .byte 103 + .byte 111 + .byte 156 + .byte 150 + .byte 153 + .byte 144 + .byte 145 + .byte 154 + .byte 146 + .byte 155 + .byte 147 + .byte 152 + .byte 149 + .byte 157 + .byte 148 + .byte 158 + .byte 151 + .byte 159 + .byte 12 + .byte 6 + .byte 9 + .byte 0 + .byte 1 + .byte 10 + .byte 2 + .byte 11 + .byte 3 + .byte 8 + .byte 5 + .byte 13 + .byte 4 + .byte 14 + .byte 7 + .byte 15 + .byte 28 + .byte 22 + .byte 25 + .byte 16 + .byte 17 + .byte 26 + .byte 18 + .byte 27 + .byte 19 + .byte 24 + .byte 21 + .byte 29 + .byte 20 + .byte 30 + .byte 23 + .byte 31 + .byte 172 + .byte 166 + .byte 169 + .byte 160 + .byte 161 + .byte 170 + .byte 162 + .byte 171 + .byte 163 + .byte 168 + .byte 165 + .byte 173 + .byte 164 + .byte 174 + .byte 167 + .byte 175 + .byte 44 + .byte 38 + .byte 41 + .byte 32 + .byte 33 + .byte 42 + .byte 34 + .byte 43 + .byte 35 + .byte 40 + .byte 37 + .byte 45 + .byte 36 + .byte 46 + .byte 39 + .byte 47 + .byte 188 + .byte 182 + .byte 185 + .byte 176 + .byte 177 + .byte 186 + .byte 178 + .byte 187 + .byte 179 + .byte 184 + .byte 181 + .byte 189 + .byte 180 + .byte 190 + .byte 183 + .byte 191 + .byte 60 + .byte 54 + .byte 57 + .byte 48 + .byte 49 + .byte 58 + .byte 50 + .byte 59 + .byte 51 + .byte 56 + .byte 53 + .byte 61 + .byte 52 + .byte 62 + .byte 55 + .byte 63 + .byte 140 + .byte 134 + .byte 137 + .byte 128 + .byte 129 + .byte 138 + .byte 130 + .byte 139 + .byte 131 + .byte 136 + .byte 133 + .byte 141 + .byte 132 + .byte 142 + .byte 135 + .byte 143 + .byte 92 + .byte 86 + .byte 89 + .byte 80 + .byte 81 + .byte 90 + .byte 82 + .byte 91 + .byte 83 + .byte 88 + .byte 85 + .byte 93 + .byte 84 + .byte 94 + .byte 87 + .byte 95 + .byte 220 + .byte 214 + .byte 217 + .byte 208 + .byte 209 + .byte 218 + .byte 210 + .byte 219 + .byte 211 + .byte 216 + .byte 213 + .byte 221 + .byte 212 + .byte 222 + .byte 215 + .byte 223 + .byte 76 + .byte 70 + .byte 73 + .byte 64 + .byte 65 + .byte 74 + .byte 66 + .byte 75 + .byte 67 + .byte 72 + .byte 69 + .byte 77 + .byte 68 + .byte 78 + .byte 71 + .byte 79 + .byte 236 + .byte 230 + .byte 233 + .byte 224 + .byte 225 + .byte 234 + .byte 226 + .byte 235 + .byte 227 + .byte 232 + .byte 229 + .byte 237 + .byte 228 + .byte 238 + .byte 231 + .byte 239 + .byte 124 + .byte 118 + .byte 121 + .byte 112 + .byte 113 + .byte 122 + .byte 114 + .byte 123 + .byte 115 + .byte 120 + .byte 117 + .byte 125 + .byte 116 + .byte 126 + .byte 119 + .byte 127 + .byte 252 + .byte 246 + .byte 249 + .byte 240 + .byte 241 + .byte 250 + .byte 242 + .byte 251 + .byte 243 + .byte 248 + .byte 245 + .byte 253 + .byte 244 + .byte 254 + .byte 247 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_6, @object + .size table_6, 256 +table_6: + .byte 51 + .byte 52 + .byte 54 + .byte 56 + .byte 60 + .byte 58 + .byte 49 + .byte 62 + .byte 57 + .byte 50 + .byte 53 + .byte 55 + .byte 48 + .byte 59 + .byte 61 + .byte 63 + .byte 67 + .byte 68 + .byte 70 + .byte 72 + .byte 76 + .byte 74 + .byte 65 + .byte 78 + .byte 73 + .byte 66 + .byte 69 + .byte 71 + .byte 64 + .byte 75 + .byte 77 + .byte 79 + .byte 99 + .byte 100 + .byte 102 + .byte 104 + .byte 108 + .byte 106 + .byte 97 + .byte 110 + .byte 105 + .byte 98 + .byte 101 + .byte 103 + .byte 96 + .byte 107 + .byte 109 + .byte 111 + .byte 131 + .byte 132 + .byte 134 + .byte 136 + .byte 140 + .byte 138 + .byte 129 + .byte 142 + .byte 137 + .byte 130 + .byte 133 + .byte 135 + .byte 128 + .byte 139 + .byte 141 + .byte 143 + .byte 195 + .byte 196 + .byte 198 + .byte 200 + .byte 204 + .byte 202 + .byte 193 + .byte 206 + .byte 201 + .byte 194 + .byte 197 + .byte 199 + .byte 192 + .byte 203 + .byte 205 + .byte 207 + .byte 163 + .byte 164 + .byte 166 + .byte 168 + .byte 172 + .byte 170 + .byte 161 + .byte 174 + .byte 169 + .byte 162 + .byte 165 + .byte 167 + .byte 160 + .byte 171 + .byte 173 + .byte 175 + .byte 19 + .byte 20 + .byte 22 + .byte 24 + .byte 28 + .byte 26 + .byte 17 + .byte 30 + .byte 25 + .byte 18 + .byte 21 + .byte 23 + .byte 16 + .byte 27 + .byte 29 + .byte 31 + .byte 227 + .byte 228 + .byte 230 + .byte 232 + .byte 236 + .byte 234 + .byte 225 + .byte 238 + .byte 233 + .byte 226 + .byte 229 + .byte 231 + .byte 224 + .byte 235 + .byte 237 + .byte 239 + .byte 147 + .byte 148 + .byte 150 + .byte 152 + .byte 156 + .byte 154 + .byte 145 + .byte 158 + .byte 153 + .byte 146 + .byte 149 + .byte 151 + .byte 144 + .byte 155 + .byte 157 + .byte 159 + .byte 35 + .byte 36 + .byte 38 + .byte 40 + .byte 44 + .byte 42 + .byte 33 + .byte 46 + .byte 41 + .byte 34 + .byte 37 + .byte 39 + .byte 32 + .byte 43 + .byte 45 + .byte 47 + .byte 83 + .byte 84 + .byte 86 + .byte 88 + .byte 92 + .byte 90 + .byte 81 + .byte 94 + .byte 89 + .byte 82 + .byte 85 + .byte 87 + .byte 80 + .byte 91 + .byte 93 + .byte 95 + .byte 115 + .byte 116 + .byte 118 + .byte 120 + .byte 124 + .byte 122 + .byte 113 + .byte 126 + .byte 121 + .byte 114 + .byte 117 + .byte 119 + .byte 112 + .byte 123 + .byte 125 + .byte 127 + .byte 3 + .byte 4 + .byte 6 + .byte 8 + .byte 12 + .byte 10 + .byte 1 + .byte 14 + .byte 9 + .byte 2 + .byte 5 + .byte 7 + .byte 0 + .byte 11 + .byte 13 + .byte 15 + .byte 179 + .byte 180 + .byte 182 + .byte 184 + .byte 188 + .byte 186 + .byte 177 + .byte 190 + .byte 185 + .byte 178 + .byte 181 + .byte 183 + .byte 176 + .byte 187 + .byte 189 + .byte 191 + .byte 211 + .byte 212 + .byte 214 + .byte 216 + .byte 220 + .byte 218 + .byte 209 + .byte 222 + .byte 217 + .byte 210 + .byte 213 + .byte 215 + .byte 208 + .byte 219 + .byte 221 + .byte 223 + .byte 243 + .byte 244 + .byte 246 + .byte 248 + .byte 252 + .byte 250 + .byte 241 + .byte 254 + .byte 249 + .byte 242 + .byte 245 + .byte 247 + .byte 240 + .byte 251 + .byte 253 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_7, @object + .size table_7, 256 +table_7: + .byte 0 + .byte 2 + .byte 4 + .byte 6 + .byte 9 + .byte 11 + .byte 13 + .byte 15 + .byte 1 + .byte 3 + .byte 5 + .byte 7 + .byte 8 + .byte 10 + .byte 12 + .byte 14 + .byte 32 + .byte 34 + .byte 36 + .byte 38 + .byte 41 + .byte 43 + .byte 45 + .byte 47 + .byte 33 + .byte 35 + .byte 37 + .byte 39 + .byte 40 + .byte 42 + .byte 44 + .byte 46 + .byte 64 + .byte 66 + .byte 68 + .byte 70 + .byte 73 + .byte 75 + .byte 77 + .byte 79 + .byte 65 + .byte 67 + .byte 69 + .byte 71 + .byte 72 + .byte 74 + .byte 76 + .byte 78 + .byte 96 + .byte 98 + .byte 100 + .byte 102 + .byte 105 + .byte 107 + .byte 109 + .byte 111 + .byte 97 + .byte 99 + .byte 101 + .byte 103 + .byte 104 + .byte 106 + .byte 108 + .byte 110 + .byte 144 + .byte 146 + .byte 148 + .byte 150 + .byte 153 + .byte 155 + .byte 157 + .byte 159 + .byte 145 + .byte 147 + .byte 149 + .byte 151 + .byte 152 + .byte 154 + .byte 156 + .byte 158 + .byte 176 + .byte 178 + .byte 180 + .byte 182 + .byte 185 + .byte 187 + .byte 189 + .byte 191 + .byte 177 + .byte 179 + .byte 181 + .byte 183 + .byte 184 + .byte 186 + .byte 188 + .byte 190 + .byte 208 + .byte 210 + .byte 212 + .byte 214 + .byte 217 + .byte 219 + .byte 221 + .byte 223 + .byte 209 + .byte 211 + .byte 213 + .byte 215 + .byte 216 + .byte 218 + .byte 220 + .byte 222 + .byte 240 + .byte 242 + .byte 244 + .byte 246 + .byte 249 + .byte 251 + .byte 253 + .byte 255 + .byte 241 + .byte 243 + .byte 245 + .byte 247 + .byte 248 + .byte 250 + .byte 252 + .byte 254 + .byte 16 + .byte 18 + .byte 20 + .byte 22 + .byte 25 + .byte 27 + .byte 29 + .byte 31 + .byte 17 + .byte 19 + .byte 21 + .byte 23 + .byte 24 + .byte 26 + .byte 28 + .byte 30 + .byte 48 + .byte 50 + .byte 52 + .byte 54 + .byte 57 + .byte 59 + .byte 61 + .byte 63 + .byte 49 + .byte 51 + .byte 53 + .byte 55 + .byte 56 + .byte 58 + .byte 60 + .byte 62 + .byte 80 + .byte 82 + .byte 84 + .byte 86 + .byte 89 + .byte 91 + .byte 93 + .byte 95 + .byte 81 + .byte 83 + .byte 85 + .byte 87 + .byte 88 + .byte 90 + .byte 92 + .byte 94 + .byte 112 + .byte 114 + .byte 116 + .byte 118 + .byte 121 + .byte 123 + .byte 125 + .byte 127 + .byte 113 + .byte 115 + .byte 117 + .byte 119 + .byte 120 + .byte 122 + .byte 124 + .byte 126 + .byte 128 + .byte 130 + .byte 132 + .byte 134 + .byte 137 + .byte 139 + .byte 141 + .byte 143 + .byte 129 + .byte 131 + .byte 133 + .byte 135 + .byte 136 + .byte 138 + .byte 140 + .byte 142 + .byte 160 + .byte 162 + .byte 164 + .byte 166 + .byte 169 + .byte 171 + .byte 173 + .byte 175 + .byte 161 + .byte 163 + .byte 165 + .byte 167 + .byte 168 + .byte 170 + .byte 172 + .byte 174 + .byte 192 + .byte 194 + .byte 196 + .byte 198 + .byte 201 + .byte 203 + .byte 205 + .byte 207 + .byte 193 + .byte 195 + .byte 197 + .byte 199 + .byte 200 + .byte 202 + .byte 204 + .byte 206 + .byte 224 + .byte 226 + .byte 228 + .byte 230 + .byte 233 + .byte 235 + .byte 237 + .byte 239 + .byte 225 + .byte 227 + .byte 229 + .byte 231 + .byte 232 + .byte 234 + .byte 236 + .byte 238 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_8, @object + .size table_8, 256 +table_8: + .byte 0 + .byte 8 + .byte 1 + .byte 9 + .byte 2 + .byte 10 + .byte 3 + .byte 11 + .byte 12 + .byte 4 + .byte 13 + .byte 5 + .byte 14 + .byte 6 + .byte 15 + .byte 7 + .byte 128 + .byte 136 + .byte 129 + .byte 137 + .byte 130 + .byte 138 + .byte 131 + .byte 139 + .byte 140 + .byte 132 + .byte 141 + .byte 133 + .byte 142 + .byte 134 + .byte 143 + .byte 135 + .byte 16 + .byte 24 + .byte 17 + .byte 25 + .byte 18 + .byte 26 + .byte 19 + .byte 27 + .byte 28 + .byte 20 + .byte 29 + .byte 21 + .byte 30 + .byte 22 + .byte 31 + .byte 23 + .byte 144 + .byte 152 + .byte 145 + .byte 153 + .byte 146 + .byte 154 + .byte 147 + .byte 155 + .byte 156 + .byte 148 + .byte 157 + .byte 149 + .byte 158 + .byte 150 + .byte 159 + .byte 151 + .byte 32 + .byte 40 + .byte 33 + .byte 41 + .byte 34 + .byte 42 + .byte 35 + .byte 43 + .byte 44 + .byte 36 + .byte 45 + .byte 37 + .byte 46 + .byte 38 + .byte 47 + .byte 39 + .byte 160 + .byte 168 + .byte 161 + .byte 169 + .byte 162 + .byte 170 + .byte 163 + .byte 171 + .byte 172 + .byte 164 + .byte 173 + .byte 165 + .byte 174 + .byte 166 + .byte 175 + .byte 167 + .byte 48 + .byte 56 + .byte 49 + .byte 57 + .byte 50 + .byte 58 + .byte 51 + .byte 59 + .byte 60 + .byte 52 + .byte 61 + .byte 53 + .byte 62 + .byte 54 + .byte 63 + .byte 55 + .byte 176 + .byte 184 + .byte 177 + .byte 185 + .byte 178 + .byte 186 + .byte 179 + .byte 187 + .byte 188 + .byte 180 + .byte 189 + .byte 181 + .byte 190 + .byte 182 + .byte 191 + .byte 183 + .byte 192 + .byte 200 + .byte 193 + .byte 201 + .byte 194 + .byte 202 + .byte 195 + .byte 203 + .byte 204 + .byte 196 + .byte 205 + .byte 197 + .byte 206 + .byte 198 + .byte 207 + .byte 199 + .byte 64 + .byte 72 + .byte 65 + .byte 73 + .byte 66 + .byte 74 + .byte 67 + .byte 75 + .byte 76 + .byte 68 + .byte 77 + .byte 69 + .byte 78 + .byte 70 + .byte 79 + .byte 71 + .byte 208 + .byte 216 + .byte 209 + .byte 217 + .byte 210 + .byte 218 + .byte 211 + .byte 219 + .byte 220 + .byte 212 + .byte 221 + .byte 213 + .byte 222 + .byte 214 + .byte 223 + .byte 215 + .byte 80 + .byte 88 + .byte 81 + .byte 89 + .byte 82 + .byte 90 + .byte 83 + .byte 91 + .byte 92 + .byte 84 + .byte 93 + .byte 85 + .byte 94 + .byte 86 + .byte 95 + .byte 87 + .byte 224 + .byte 232 + .byte 225 + .byte 233 + .byte 226 + .byte 234 + .byte 227 + .byte 235 + .byte 236 + .byte 228 + .byte 237 + .byte 229 + .byte 238 + .byte 230 + .byte 239 + .byte 231 + .byte 96 + .byte 104 + .byte 97 + .byte 105 + .byte 98 + .byte 106 + .byte 99 + .byte 107 + .byte 108 + .byte 100 + .byte 109 + .byte 101 + .byte 110 + .byte 102 + .byte 111 + .byte 103 + .byte 240 + .byte 248 + .byte 241 + .byte 249 + .byte 242 + .byte 250 + .byte 243 + .byte 251 + .byte 252 + .byte 244 + .byte 253 + .byte 245 + .byte 254 + .byte 246 + .byte 255 + .byte 247 + .byte 112 + .byte 120 + .byte 113 + .byte 121 + .byte 114 + .byte 122 + .byte 115 + .byte 123 + .byte 124 + .byte 116 + .byte 125 + .byte 117 + .byte 126 + .byte 118 + .byte 127 + .byte 119 + + .text +.global forkskinny_128_256_rounds + .type forkskinny_128_256_rounds, @function +forkskinny_128_256_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 51 + ldd r2,Z+32 + ldd r3,Z+33 + ldd r4,Z+34 + ldd r5,Z+35 + ldd r6,Z+36 + ldd r7,Z+37 + ldd r8,Z+38 + ldd r9,Z+39 + ldd r10,Z+40 + ldd r11,Z+41 + ldd r12,Z+42 + ldd r13,Z+43 + ldd r14,Z+44 + ldd r15,Z+45 + ldd r24,Z+46 + ldd r25,Z+47 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + lsl r20 + std Y+33,r20 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +86: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + mov r0,r14 + mov r14,r15 + mov r15,r24 + mov r24,r25 + mov r25,r0 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + movw r18,r14 + movw r26,r24 + eor r18,r10 + eor r19,r11 + eor r26,r12 + eor r27,r13 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r18 + movw r4,r26 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + ldd r0,Y+1 + std Y+9,r0 + ldd r0,Y+2 + std Y+10,r0 + ldd r0,Y+3 + std Y+11,r0 + ldd r0,Y+4 + std Y+12,r0 + ldd r0,Y+5 + std Y+13,r0 + ldd r0,Y+6 + std Y+14,r0 + ldd r0,Y+7 + std Y+15,r0 + ldd r0,Y+8 + std Y+16,r0 + std Y+1,r19 + std Y+2,r17 + std Y+3,r18 + std Y+4,r21 + std Y+5,r26 + std Y+6,r16 + std Y+7,r20 + std Y+8,r27 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r16,Y+31 + ldd r17,Y+32 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+17 + std Y+25,r0 + ldd r0,Y+18 + std Y+26,r0 + ldd r0,Y+19 + std Y+27,r0 + ldd r0,Y+20 + std Y+28,r0 + ldd r0,Y+21 + std Y+29,r0 + ldd r0,Y+22 + std Y+30,r0 + ldd r0,Y+23 + std Y+31,r0 + ldd r0,Y+24 + std Y+32,r0 + std Y+17,r19 + std Y+18,r17 + std Y+19,r18 + std Y+20,r21 + std Y+21,r26 + std Y+22,r16 + std Y+23,r20 + std Y+24,r27 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + ldd r18,Y+33 + cp r22,r18 + breq 5259f + rjmp 86b +5259: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+32,r2 + std Z+33,r3 + std Z+34,r4 + std Z+35,r5 + std Z+36,r6 + std Z+37,r7 + std Z+38,r8 + std Z+39,r9 + std Z+40,r10 + std Z+41,r11 + std Z+42,r12 + std Z+43,r13 + std Z+44,r14 + std Z+45,r15 + std Z+46,r24 + std Z+47,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + adiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_256_rounds, .-forkskinny_128_256_rounds + + .text +.global forkskinny_128_256_inv_rounds + .type forkskinny_128_256_inv_rounds, @function +forkskinny_128_256_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 51 + ldd r2,Z+32 + ldd r3,Z+33 + ldd r4,Z+34 + ldd r5,Z+35 + ldd r6,Z+36 + ldd r7,Z+37 + ldd r8,Z+38 + ldd r9,Z+39 + ldd r10,Z+40 + ldd r11,Z+41 + ldd r12,Z+42 + ldd r13,Z+43 + ldd r14,Z+44 + ldd r15,Z+45 + ldd r24,Z+46 + ldd r25,Z+47 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + lsl r20 + std Y+33,r20 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +86: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + ldd r0,Y+9 + std Y+1,r0 + ldd r0,Y+10 + std Y+2,r0 + ldd r0,Y+11 + std Y+3,r0 + ldd r0,Y+12 + std Y+4,r0 + ldd r0,Y+13 + std Y+5,r0 + ldd r0,Y+14 + std Y+6,r0 + ldd r0,Y+15 + std Y+7,r0 + ldd r0,Y+16 + std Y+8,r0 + std Y+9,r26 + std Y+10,r18 + std Y+11,r20 + std Y+12,r17 + std Y+13,r16 + std Y+14,r27 + std Y+15,r21 + std Y+16,r19 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r16,Y+23 + ldd r17,Y+24 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+25 + std Y+17,r0 + ldd r0,Y+26 + std Y+18,r0 + ldd r0,Y+27 + std Y+19,r0 + ldd r0,Y+28 + std Y+20,r0 + ldd r0,Y+29 + std Y+21,r0 + ldd r0,Y+30 + std Y+22,r0 + ldd r0,Y+31 + std Y+23,r0 + ldd r0,Y+32 + std Y+24,r0 + std Y+25,r26 + std Y+26,r18 + std Y+27,r20 + std Y+28,r17 + std Y+29,r16 + std Y+30,r27 + std Y+31,r21 + std Y+32,r19 + movw r18,r2 + movw r26,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r18 + movw r24,r26 + eor r14,r10 + eor r15,r11 + eor r24,r12 + eor r25,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + mov r0,r6 + mov r6,r7 + mov r7,r8 + mov r8,r9 + mov r9,r0 + mov r0,r10 + mov r10,r12 + mov r12,r0 + mov r0,r11 + mov r11,r13 + mov r13,r0 + mov r0,r25 + mov r25,r24 + mov r24,r15 + mov r15,r14 + mov r14,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + ldi r30,lo8(table_1) + ldi r31,hi8(table_1) +#if defined(RAMPZ) + ldi r18,hh8(table_1) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+33 + cp r22,r18 + breq 5259f + rjmp 86b +5259: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+32,r2 + std Z+33,r3 + std Z+34,r4 + std Z+35,r5 + std Z+36,r6 + std Z+37,r7 + std Z+38,r8 + std Z+39,r9 + std Z+40,r10 + std Z+41,r11 + std Z+42,r12 + std Z+43,r13 + std Z+44,r14 + std Z+45,r15 + std Z+46,r24 + std Z+47,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + adiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_256_inv_rounds, .-forkskinny_128_256_inv_rounds + + .text +.global forkskinny_128_256_forward_tk + .type forkskinny_128_256_forward_tk, @function +forkskinny_128_256_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 34 + ld r4,Z + ldd r5,Z+1 + ldd r6,Z+2 + ldd r7,Z+3 + ldd r8,Z+4 + ldd r9,Z+5 + ldd r10,Z+6 + ldd r11,Z+7 + ldd r12,Z+8 + ldd r13,Z+9 + ldd r14,Z+10 + ldd r15,Z+11 + ldd r24,Z+12 + ldd r25,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + push r31 + push r30 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r23,hh8(table_2) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +51: + movw r18,r12 + movw r20,r14 + movw r26,r24 + movw r2,r16 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + mov r4,r19 + mov r5,r3 + mov r6,r18 + mov r7,r27 + mov r8,r20 + mov r9,r2 + mov r10,r26 + mov r11,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r26,Y+13 + ldd r27,Y+14 + ldd r2,Y+15 + ldd r3,Y+16 + ldd r23,Y+1 + std Y+9,r23 + ldd r23,Y+2 + std Y+10,r23 + ldd r23,Y+3 + std Y+11,r23 + ldd r23,Y+4 + std Y+12,r23 + ldd r23,Y+5 + std Y+13,r23 + ldd r23,Y+6 + std Y+14,r23 + ldd r23,Y+7 + std Y+15,r23 + ldd r23,Y+8 + std Y+16,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+1,r19 + std Y+2,r3 + std Y+3,r18 + std Y+4,r27 + std Y+5,r20 + std Y+6,r2 + std Y+7,r26 + std Y+8,r21 + dec r22 + breq 5109f + rjmp 51b +5109: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r4 + std Z+1,r5 + std Z+2,r6 + std Z+3,r7 + std Z+4,r8 + std Z+5,r9 + std Z+6,r10 + std Z+7,r11 + std Z+8,r12 + std Z+9,r13 + std Z+10,r14 + std Z+11,r15 + std Z+12,r24 + std Z+13,r25 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_256_forward_tk, .-forkskinny_128_256_forward_tk + + .text +.global forkskinny_128_256_reverse_tk + .type forkskinny_128_256_reverse_tk, @function +forkskinny_128_256_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 34 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r23,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +51: + movw r18,r2 + movw r20,r4 + movw r26,r6 + movw r24,r8 + movw r2,r10 + movw r4,r12 + movw r6,r14 + movw r8,r16 + mov r10,r20 + mov r11,r18 + mov r12,r26 + mov r13,r25 + mov r14,r24 + mov r15,r21 + mov r16,r27 + mov r17,r19 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r24,Y+7 + ldd r25,Y+8 + ldd r23,Y+9 + std Y+1,r23 + ldd r23,Y+10 + std Y+2,r23 + ldd r23,Y+11 + std Y+3,r23 + ldd r23,Y+12 + std Y+4,r23 + ldd r23,Y+13 + std Y+5,r23 + ldd r23,Y+14 + std Y+6,r23 + ldd r23,Y+15 + std Y+7,r23 + ldd r23,Y+16 + std Y+8,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+9,r20 + std Y+10,r18 + std Y+11,r26 + std Y+12,r25 + std Y+13,r24 + std Y+14,r21 + std Y+15,r27 + std Y+16,r19 + dec r22 + breq 5109f + rjmp 51b +5109: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r2 + std Z+1,r3 + std Z+2,r4 + std Z+3,r5 + std Z+4,r6 + std Z+5,r7 + std Z+6,r8 + std Z+7,r9 + std Z+8,r10 + std Z+9,r11 + std Z+10,r12 + std Z+11,r13 + std Z+12,r14 + std Z+13,r15 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_256_reverse_tk, .-forkskinny_128_256_reverse_tk + + .text +.global forkskinny_128_384_rounds + .type forkskinny_128_384_rounds, @function +forkskinny_128_384_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 67 + ldd r2,Z+48 + ldd r3,Z+49 + ldd r4,Z+50 + ldd r5,Z+51 + ldd r6,Z+52 + ldd r7,Z+53 + ldd r8,Z+54 + ldd r9,Z+55 + ldd r10,Z+56 + ldd r11,Z+57 + ldd r12,Z+58 + ldd r13,Z+59 + ldd r14,Z+60 + ldd r15,Z+61 + ldd r24,Z+62 + ldd r25,Z+63 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r26,Z+34 + ldd r27,Z+35 + std Y+33,r18 + std Y+34,r19 + std Y+35,r26 + std Y+36,r27 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + std Y+37,r18 + std Y+38,r19 + std Y+39,r26 + std Y+40,r27 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r26,Z+42 + ldd r27,Z+43 + std Y+41,r18 + std Y+42,r19 + std Y+43,r26 + std Y+44,r27 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r26,Z+46 + ldd r27,Z+47 + std Y+45,r18 + std Y+46,r19 + std Y+47,r26 + std Y+48,r27 + lsl r20 + std Y+49,r20 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +118: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + mov r0,r14 + mov r14,r15 + mov r15,r24 + mov r24,r25 + mov r25,r0 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + movw r18,r14 + movw r26,r24 + eor r18,r10 + eor r19,r11 + eor r26,r12 + eor r27,r13 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r18 + movw r4,r26 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + ldd r0,Y+1 + std Y+9,r0 + ldd r0,Y+2 + std Y+10,r0 + ldd r0,Y+3 + std Y+11,r0 + ldd r0,Y+4 + std Y+12,r0 + ldd r0,Y+5 + std Y+13,r0 + ldd r0,Y+6 + std Y+14,r0 + ldd r0,Y+7 + std Y+15,r0 + ldd r0,Y+8 + std Y+16,r0 + std Y+1,r19 + std Y+2,r17 + std Y+3,r18 + std Y+4,r21 + std Y+5,r26 + std Y+6,r16 + std Y+7,r20 + std Y+8,r27 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r16,Y+31 + ldd r17,Y+32 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+17 + std Y+25,r0 + ldd r0,Y+18 + std Y+26,r0 + ldd r0,Y+19 + std Y+27,r0 + ldd r0,Y+20 + std Y+28,r0 + ldd r0,Y+21 + std Y+29,r0 + ldd r0,Y+22 + std Y+30,r0 + ldd r0,Y+23 + std Y+31,r0 + ldd r0,Y+24 + std Y+32,r0 + std Y+17,r19 + std Y+18,r17 + std Y+19,r18 + std Y+20,r21 + std Y+21,r26 + std Y+22,r16 + std Y+23,r20 + std Y+24,r27 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + ldd r20,Y+45 + ldd r21,Y+46 + ldd r16,Y+47 + ldd r17,Y+48 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+33 + std Y+41,r0 + ldd r0,Y+34 + std Y+42,r0 + ldd r0,Y+35 + std Y+43,r0 + ldd r0,Y+36 + std Y+44,r0 + ldd r0,Y+37 + std Y+45,r0 + ldd r0,Y+38 + std Y+46,r0 + ldd r0,Y+39 + std Y+47,r0 + ldd r0,Y+40 + std Y+48,r0 + std Y+33,r19 + std Y+34,r17 + std Y+35,r18 + std Y+36,r21 + std Y+37,r26 + std Y+38,r16 + std Y+39,r20 + std Y+40,r27 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + ldd r18,Y+49 + cp r22,r18 + breq 5348f + rjmp 118b +5348: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+48,r2 + std Z+49,r3 + std Z+50,r4 + std Z+51,r5 + std Z+52,r6 + std Z+53,r7 + std Z+54,r8 + std Z+55,r9 + std Z+56,r10 + std Z+57,r11 + std Z+58,r12 + std Z+59,r13 + std Z+60,r14 + std Z+61,r15 + std Z+62,r24 + std Z+63,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + std Z+32,r18 + std Z+33,r19 + std Z+34,r26 + std Z+35,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + std Z+36,r18 + std Z+37,r19 + std Z+38,r26 + std Z+39,r27 + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + std Z+40,r18 + std Z+41,r19 + std Z+42,r26 + std Z+43,r27 + ldd r18,Y+45 + ldd r19,Y+46 + ldd r26,Y+47 + ldd r27,Y+48 + std Z+44,r18 + std Z+45,r19 + std Z+46,r26 + std Z+47,r27 + adiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_384_rounds, .-forkskinny_128_384_rounds + + .text +.global forkskinny_128_384_inv_rounds + .type forkskinny_128_384_inv_rounds, @function +forkskinny_128_384_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 67 + ldd r2,Z+48 + ldd r3,Z+49 + ldd r4,Z+50 + ldd r5,Z+51 + ldd r6,Z+52 + ldd r7,Z+53 + ldd r8,Z+54 + ldd r9,Z+55 + ldd r10,Z+56 + ldd r11,Z+57 + ldd r12,Z+58 + ldd r13,Z+59 + ldd r14,Z+60 + ldd r15,Z+61 + ldd r24,Z+62 + ldd r25,Z+63 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r26,Z+34 + ldd r27,Z+35 + std Y+33,r18 + std Y+34,r19 + std Y+35,r26 + std Y+36,r27 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + std Y+37,r18 + std Y+38,r19 + std Y+39,r26 + std Y+40,r27 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r26,Z+42 + ldd r27,Z+43 + std Y+41,r18 + std Y+42,r19 + std Y+43,r26 + std Y+44,r27 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r26,Z+46 + ldd r27,Z+47 + std Y+45,r18 + std Y+46,r19 + std Y+47,r26 + std Y+48,r27 + lsl r20 + std Y+49,r20 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +118: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + ldd r0,Y+9 + std Y+1,r0 + ldd r0,Y+10 + std Y+2,r0 + ldd r0,Y+11 + std Y+3,r0 + ldd r0,Y+12 + std Y+4,r0 + ldd r0,Y+13 + std Y+5,r0 + ldd r0,Y+14 + std Y+6,r0 + ldd r0,Y+15 + std Y+7,r0 + ldd r0,Y+16 + std Y+8,r0 + std Y+9,r26 + std Y+10,r18 + std Y+11,r20 + std Y+12,r17 + std Y+13,r16 + std Y+14,r27 + std Y+15,r21 + std Y+16,r19 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r16,Y+23 + ldd r17,Y+24 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+25 + std Y+17,r0 + ldd r0,Y+26 + std Y+18,r0 + ldd r0,Y+27 + std Y+19,r0 + ldd r0,Y+28 + std Y+20,r0 + ldd r0,Y+29 + std Y+21,r0 + ldd r0,Y+30 + std Y+22,r0 + ldd r0,Y+31 + std Y+23,r0 + ldd r0,Y+32 + std Y+24,r0 + std Y+25,r26 + std Y+26,r18 + std Y+27,r20 + std Y+28,r17 + std Y+29,r16 + std Y+30,r27 + std Y+31,r21 + std Y+32,r19 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + ldd r20,Y+37 + ldd r21,Y+38 + ldd r16,Y+39 + ldd r17,Y+40 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+41 + std Y+33,r0 + ldd r0,Y+42 + std Y+34,r0 + ldd r0,Y+43 + std Y+35,r0 + ldd r0,Y+44 + std Y+36,r0 + ldd r0,Y+45 + std Y+37,r0 + ldd r0,Y+46 + std Y+38,r0 + ldd r0,Y+47 + std Y+39,r0 + ldd r0,Y+48 + std Y+40,r0 + std Y+41,r26 + std Y+42,r18 + std Y+43,r20 + std Y+44,r17 + std Y+45,r16 + std Y+46,r27 + std Y+47,r21 + std Y+48,r19 + movw r18,r2 + movw r26,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r18 + movw r24,r26 + eor r14,r10 + eor r15,r11 + eor r24,r12 + eor r25,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + mov r0,r6 + mov r6,r7 + mov r7,r8 + mov r8,r9 + mov r9,r0 + mov r0,r10 + mov r10,r12 + mov r12,r0 + mov r0,r11 + mov r11,r13 + mov r13,r0 + mov r0,r25 + mov r25,r24 + mov r24,r15 + mov r15,r14 + mov r14,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + ldi r30,lo8(table_1) + ldi r31,hi8(table_1) +#if defined(RAMPZ) + ldi r18,hh8(table_1) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+49 + cp r22,r18 + breq 5348f + rjmp 118b +5348: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+48,r2 + std Z+49,r3 + std Z+50,r4 + std Z+51,r5 + std Z+52,r6 + std Z+53,r7 + std Z+54,r8 + std Z+55,r9 + std Z+56,r10 + std Z+57,r11 + std Z+58,r12 + std Z+59,r13 + std Z+60,r14 + std Z+61,r15 + std Z+62,r24 + std Z+63,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + std Z+32,r18 + std Z+33,r19 + std Z+34,r26 + std Z+35,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + std Z+36,r18 + std Z+37,r19 + std Z+38,r26 + std Z+39,r27 + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + std Z+40,r18 + std Z+41,r19 + std Z+42,r26 + std Z+43,r27 + ldd r18,Y+45 + ldd r19,Y+46 + ldd r26,Y+47 + ldd r27,Y+48 + std Z+44,r18 + std Z+45,r19 + std Z+46,r26 + std Z+47,r27 + adiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_384_inv_rounds, .-forkskinny_128_384_inv_rounds + + .text +.global forkskinny_128_384_forward_tk + .type forkskinny_128_384_forward_tk, @function +forkskinny_128_384_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r4,Z + ldd r5,Z+1 + ldd r6,Z+2 + ldd r7,Z+3 + ldd r8,Z+4 + ldd r9,Z+5 + ldd r10,Z+6 + ldd r11,Z+7 + ldd r12,Z+8 + ldd r13,Z+9 + ldd r14,Z+10 + ldd r15,Z+11 + ldd r24,Z+12 + ldd r25,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + std Y+17,r18 + std Y+18,r19 + std Y+19,r20 + std Y+20,r21 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r20,Z+38 + ldd r21,Z+39 + std Y+21,r18 + std Y+22,r19 + std Y+23,r20 + std Y+24,r21 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r20,Z+42 + ldd r21,Z+43 + std Y+25,r18 + std Y+26,r19 + std Y+27,r20 + std Y+28,r21 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r20,Z+46 + ldd r21,Z+47 + std Y+29,r18 + std Y+30,r19 + std Y+31,r20 + std Y+32,r21 + push r31 + push r30 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r23,hh8(table_2) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +83: + movw r18,r12 + movw r20,r14 + movw r26,r24 + movw r2,r16 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + mov r4,r19 + mov r5,r3 + mov r6,r18 + mov r7,r27 + mov r8,r20 + mov r9,r2 + mov r10,r26 + mov r11,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r26,Y+13 + ldd r27,Y+14 + ldd r2,Y+15 + ldd r3,Y+16 + ldd r23,Y+1 + std Y+9,r23 + ldd r23,Y+2 + std Y+10,r23 + ldd r23,Y+3 + std Y+11,r23 + ldd r23,Y+4 + std Y+12,r23 + ldd r23,Y+5 + std Y+13,r23 + ldd r23,Y+6 + std Y+14,r23 + ldd r23,Y+7 + std Y+15,r23 + ldd r23,Y+8 + std Y+16,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+1,r19 + std Y+2,r3 + std Y+3,r18 + std Y+4,r27 + std Y+5,r20 + std Y+6,r2 + std Y+7,r26 + std Y+8,r21 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + ldd r26,Y+29 + ldd r27,Y+30 + ldd r2,Y+31 + ldd r3,Y+32 + ldd r23,Y+17 + std Y+25,r23 + ldd r23,Y+18 + std Y+26,r23 + ldd r23,Y+19 + std Y+27,r23 + ldd r23,Y+20 + std Y+28,r23 + ldd r23,Y+21 + std Y+29,r23 + ldd r23,Y+22 + std Y+30,r23 + ldd r23,Y+23 + std Y+31,r23 + ldd r23,Y+24 + std Y+32,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+17,r19 + std Y+18,r3 + std Y+19,r18 + std Y+20,r27 + std Y+21,r20 + std Y+22,r2 + std Y+23,r26 + std Y+24,r21 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + breq 5183f + rjmp 83b +5183: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r4 + std Z+1,r5 + std Z+2,r6 + std Z+3,r7 + std Z+4,r8 + std Z+5,r9 + std Z+6,r10 + std Z+7,r11 + std Z+8,r12 + std Z+9,r13 + std Z+10,r14 + std Z+11,r15 + std Z+12,r24 + std Z+13,r25 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r20,Y+23 + ldd r21,Y+24 + std Z+36,r18 + std Z+37,r19 + std Z+38,r20 + std Z+39,r21 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + std Z+40,r18 + std Z+41,r19 + std Z+42,r20 + std Z+43,r21 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r20,Y+31 + ldd r21,Y+32 + std Z+44,r18 + std Z+45,r19 + std Z+46,r20 + std Z+47,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_384_forward_tk, .-forkskinny_128_384_forward_tk + + .text +.global forkskinny_128_384_reverse_tk + .type forkskinny_128_384_reverse_tk, @function +forkskinny_128_384_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + std Y+17,r18 + std Y+18,r19 + std Y+19,r20 + std Y+20,r21 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r20,Z+38 + ldd r21,Z+39 + std Y+21,r18 + std Y+22,r19 + std Y+23,r20 + std Y+24,r21 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r20,Z+42 + ldd r21,Z+43 + std Y+25,r18 + std Y+26,r19 + std Y+27,r20 + std Y+28,r21 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r20,Z+46 + ldd r21,Z+47 + std Y+29,r18 + std Y+30,r19 + std Y+31,r20 + std Y+32,r21 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r23,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +83: + movw r18,r2 + movw r20,r4 + movw r26,r6 + movw r24,r8 + movw r2,r10 + movw r4,r12 + movw r6,r14 + movw r8,r16 + mov r10,r20 + mov r11,r18 + mov r12,r26 + mov r13,r25 + mov r14,r24 + mov r15,r21 + mov r16,r27 + mov r17,r19 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r24,Y+7 + ldd r25,Y+8 + ldd r23,Y+9 + std Y+1,r23 + ldd r23,Y+10 + std Y+2,r23 + ldd r23,Y+11 + std Y+3,r23 + ldd r23,Y+12 + std Y+4,r23 + ldd r23,Y+13 + std Y+5,r23 + ldd r23,Y+14 + std Y+6,r23 + ldd r23,Y+15 + std Y+7,r23 + ldd r23,Y+16 + std Y+8,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+9,r20 + std Y+10,r18 + std Y+11,r26 + std Y+12,r25 + std Y+13,r24 + std Y+14,r21 + std Y+15,r27 + std Y+16,r19 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + ldd r26,Y+21 + ldd r27,Y+22 + ldd r24,Y+23 + ldd r25,Y+24 + ldd r23,Y+25 + std Y+17,r23 + ldd r23,Y+26 + std Y+18,r23 + ldd r23,Y+27 + std Y+19,r23 + ldd r23,Y+28 + std Y+20,r23 + ldd r23,Y+29 + std Y+21,r23 + ldd r23,Y+30 + std Y+22,r23 + ldd r23,Y+31 + std Y+23,r23 + ldd r23,Y+32 + std Y+24,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+25,r20 + std Y+26,r18 + std Y+27,r26 + std Y+28,r25 + std Y+29,r24 + std Y+30,r21 + std Y+31,r27 + std Y+32,r19 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + breq 5183f + rjmp 83b +5183: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r2 + std Z+1,r3 + std Z+2,r4 + std Z+3,r5 + std Z+4,r6 + std Z+5,r7 + std Z+6,r8 + std Z+7,r9 + std Z+8,r10 + std Z+9,r11 + std Z+10,r12 + std Z+11,r13 + std Z+12,r14 + std Z+13,r15 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r20,Y+23 + ldd r21,Y+24 + std Z+36,r18 + std Z+37,r19 + std Z+38,r20 + std Z+39,r21 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + std Z+40,r18 + std Z+41,r19 + std Z+42,r20 + std Z+43,r21 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r20,Y+31 + ldd r21,Y+32 + std Z+44,r18 + std Z+45,r19 + std Z+46,r20 + std Z+47,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_384_reverse_tk, .-forkskinny_128_384_reverse_tk + + .text +.global forkskinny_64_192_rounds + .type forkskinny_64_192_rounds, @function +forkskinny_64_192_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 38 + ldd r26,Z+24 + ldd r27,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + ld r18,Z + ldd r19,Z+1 + std Y+1,r18 + std Y+2,r19 + ldd r18,Z+2 + ldd r19,Z+3 + std Y+3,r18 + std Y+4,r19 + ldd r18,Z+4 + ldd r19,Z+5 + std Y+5,r18 + std Y+6,r19 + ldd r18,Z+6 + ldd r19,Z+7 + std Y+7,r18 + std Y+8,r19 + ldd r18,Z+8 + ldd r19,Z+9 + std Y+9,r18 + std Y+10,r19 + ldd r18,Z+10 + ldd r19,Z+11 + std Y+11,r18 + std Y+12,r19 + ldd r18,Z+12 + ldd r19,Z+13 + std Y+13,r18 + std Y+14,r19 + ldd r18,Z+14 + ldd r19,Z+15 + std Y+15,r18 + std Y+16,r19 + ldd r18,Z+16 + ldd r19,Z+17 + std Y+17,r18 + std Y+18,r19 + ldd r18,Z+18 + ldd r19,Z+19 + std Y+19,r18 + std Y+20,r19 + ldd r18,Z+20 + ldd r19,Z+21 + std Y+21,r18 + std Y+22,r19 + ldd r18,Z+22 + ldd r19,Z+23 + std Y+23,r18 + std Y+24,r19 + push r31 + push r30 + ldi r30,lo8(table_5) + ldi r31,hi8(table_5) +#if defined(RAMPZ) + ldi r18,hh8(table_5) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 + lsl r20 +61: + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + eor r26,r18 + eor r27,r19 + ldd r18,Y+3 + ldd r19,Y+4 + eor r2,r18 + eor r3,r19 + ldd r18,Y+9 + ldd r19,Y+10 + eor r26,r18 + eor r27,r19 + ldd r18,Y+11 + ldd r19,Y+12 + eor r2,r18 + eor r3,r19 + ldd r18,Y+17 + ldd r19,Y+18 + eor r26,r18 + eor r27,r19 + ldd r18,Y+19 + ldd r19,Y+20 + eor r2,r18 + eor r3,r19 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r27,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r3,r18 + ldi r18,32 + eor r5,r18 + eor r26,r18 + mov r0,r1 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + or r3,r0 + mov r0,r4 + mov r4,r5 + mov r5,r0 + mov r0,r6 + mov r6,r7 + mov r7,r0 + mov r0,r1 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + or r7,r0 + eor r2,r4 + eor r3,r5 + eor r4,r26 + eor r5,r27 + movw r18,r6 + eor r18,r4 + eor r19,r5 + movw r6,r4 + movw r4,r2 + movw r2,r26 + movw r26,r18 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r8,Y+3 + ldd r9,Y+4 + ldd r10,Y+5 + ldd r11,Y+6 + ldd r12,Y+7 + ldd r13,Y+8 + std Y+5,r18 + std Y+6,r19 + std Y+7,r8 + std Y+8,r9 + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + ldi r25,240 + and r9,r25 + swap r12 + ldi r24,15 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+1,r18 + std Y+2,r19 + std Y+3,r8 + std Y+4,r9 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r18,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+9 + ldd r19,Y+10 + ldd r8,Y+11 + ldd r9,Y+12 + ldd r10,Y+13 + ldd r11,Y+14 + ldd r12,Y+15 + ldd r13,Y+16 + std Y+13,r18 + std Y+14,r19 + std Y+15,r8 + std Y+16,r9 + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + and r9,r25 + swap r12 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+9,r18 + std Y+10,r19 + std Y+11,r8 + std Y+12,r9 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r8,Y+19 + ldd r9,Y+20 + ldd r10,Y+21 + ldd r11,Y+22 + ldd r12,Y+23 + ldd r13,Y+24 + std Y+21,r18 + std Y+22,r19 + std Y+23,r8 + std Y+24,r9 + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + and r9,r25 + swap r12 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+17,r18 + std Y+18,r19 + std Y+19,r8 + std Y+20,r9 + ldi r30,lo8(table_5) + ldi r31,hi8(table_5) +#if defined(RAMPZ) + ldi r18,hh8(table_5) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + cp r22,r20 + breq 5273f + rjmp 61b +5273: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+24,r26 + std Z+25,r27 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Y+1 + ldd r19,Y+2 + st Z,r18 + std Z+1,r19 + ldd r18,Y+3 + ldd r19,Y+4 + std Z+2,r18 + std Z+3,r19 + ldd r18,Y+5 + ldd r19,Y+6 + std Z+4,r18 + std Z+5,r19 + ldd r18,Y+7 + ldd r19,Y+8 + std Z+6,r18 + std Z+7,r19 + ldd r18,Y+9 + ldd r19,Y+10 + std Z+8,r18 + std Z+9,r19 + ldd r18,Y+11 + ldd r19,Y+12 + std Z+10,r18 + std Z+11,r19 + ldd r18,Y+13 + ldd r19,Y+14 + std Z+12,r18 + std Z+13,r19 + ldd r18,Y+15 + ldd r19,Y+16 + std Z+14,r18 + std Z+15,r19 + ldd r18,Y+17 + ldd r19,Y+18 + std Z+16,r18 + std Z+17,r19 + ldd r18,Y+19 + ldd r19,Y+20 + std Z+18,r18 + std Z+19,r19 + ldd r18,Y+21 + ldd r19,Y+22 + std Z+20,r18 + std Z+21,r19 + ldd r18,Y+23 + ldd r19,Y+24 + std Z+22,r18 + std Z+23,r19 + adiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_rounds, .-forkskinny_64_192_rounds + + .text +.global forkskinny_64_192_inv_rounds + .type forkskinny_64_192_inv_rounds, @function +forkskinny_64_192_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 38 + ldd r26,Z+24 + ldd r27,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + ld r18,Z + ldd r19,Z+1 + std Y+1,r18 + std Y+2,r19 + ldd r18,Z+2 + ldd r19,Z+3 + std Y+3,r18 + std Y+4,r19 + ldd r18,Z+4 + ldd r19,Z+5 + std Y+5,r18 + std Y+6,r19 + ldd r18,Z+6 + ldd r19,Z+7 + std Y+7,r18 + std Y+8,r19 + ldd r18,Z+8 + ldd r19,Z+9 + std Y+9,r18 + std Y+10,r19 + ldd r18,Z+10 + ldd r19,Z+11 + std Y+11,r18 + std Y+12,r19 + ldd r18,Z+12 + ldd r19,Z+13 + std Y+13,r18 + std Y+14,r19 + ldd r18,Z+14 + ldd r19,Z+15 + std Y+15,r18 + std Y+16,r19 + ldd r18,Z+16 + ldd r19,Z+17 + std Y+17,r18 + std Y+18,r19 + ldd r18,Z+18 + ldd r19,Z+19 + std Y+19,r18 + std Y+20,r19 + ldd r18,Z+20 + ldd r19,Z+21 + std Y+21,r18 + std Y+22,r19 + ldd r18,Z+22 + ldd r19,Z+23 + std Y+23,r18 + std Y+24,r19 + push r31 + push r30 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 + lsl r20 +61: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r8,Y+3 + ldd r9,Y+4 + ldd r10,Y+5 + ldd r11,Y+6 + ldd r12,Y+7 + ldd r13,Y+8 + std Y+1,r10 + std Y+2,r11 + std Y+3,r12 + std Y+4,r13 + mov r11,r18 + ldi r25,240 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+5,r10 + std Y+6,r11 + std Y+7,r12 + std Y+8,r13 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r8,Y+11 + ldd r9,Y+12 + ldd r10,Y+13 + ldd r11,Y+14 + ldd r12,Y+15 + ldd r13,Y+16 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r11,r18 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+13,r10 + std Y+14,r11 + std Y+15,r12 + std Y+16,r13 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r18,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r8,Y+19 + ldd r9,Y+20 + ldd r10,Y+21 + ldd r11,Y+22 + ldd r12,Y+23 + ldd r13,Y+24 + std Y+17,r10 + std Y+18,r11 + std Y+19,r12 + std Y+20,r13 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r11,r18 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+21,r10 + std Y+22,r11 + std Y+23,r12 + std Y+24,r13 + movw r18,r26 + movw r26,r2 + movw r2,r4 + movw r4,r6 + movw r6,r18 + eor r6,r4 + eor r7,r5 + eor r4,r26 + eor r5,r27 + eor r2,r4 + eor r3,r5 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + mov r0,r5 + mov r5,r4 + mov r4,r0 + mov r0,r7 + mov r7,r6 + mov r6,r0 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + ldd r18,Y+1 + ldd r19,Y+2 + eor r26,r18 + eor r27,r19 + ldd r18,Y+3 + ldd r19,Y+4 + eor r2,r18 + eor r3,r19 + ldd r18,Y+9 + ldd r19,Y+10 + eor r26,r18 + eor r27,r19 + ldd r18,Y+11 + ldd r19,Y+12 + eor r2,r18 + eor r3,r19 + ldd r18,Y+17 + ldd r19,Y+18 + eor r26,r18 + eor r27,r19 + ldd r18,Y+19 + ldd r19,Y+20 + eor r2,r18 + eor r3,r19 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r3,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r27,r18 + ldi r18,32 + eor r5,r18 + eor r26,r18 + ldi r30,lo8(table_6) + ldi r31,hi8(table_6) +#if defined(RAMPZ) + ldi r18,hh8(table_6) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + cp r22,r20 + breq 5268f + rjmp 61b +5268: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+24,r26 + std Z+25,r27 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Y+1 + ldd r19,Y+2 + st Z,r18 + std Z+1,r19 + ldd r18,Y+3 + ldd r19,Y+4 + std Z+2,r18 + std Z+3,r19 + ldd r18,Y+5 + ldd r19,Y+6 + std Z+4,r18 + std Z+5,r19 + ldd r18,Y+7 + ldd r19,Y+8 + std Z+6,r18 + std Z+7,r19 + ldd r18,Y+9 + ldd r19,Y+10 + std Z+8,r18 + std Z+9,r19 + ldd r18,Y+11 + ldd r19,Y+12 + std Z+10,r18 + std Z+11,r19 + ldd r18,Y+13 + ldd r19,Y+14 + std Z+12,r18 + std Z+13,r19 + ldd r18,Y+15 + ldd r19,Y+16 + std Z+14,r18 + std Z+15,r19 + ldd r18,Y+17 + ldd r19,Y+18 + std Z+16,r18 + std Z+17,r19 + ldd r18,Y+19 + ldd r19,Y+20 + std Z+18,r18 + std Z+19,r19 + ldd r18,Y+21 + ldd r19,Y+22 + std Z+20,r18 + std Z+21,r19 + ldd r18,Y+23 + ldd r19,Y+24 + std Z+22,r18 + std Z+23,r19 + adiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_inv_rounds, .-forkskinny_64_192_inv_rounds + + .text +.global forkskinny_64_192_forward_tk + .type forkskinny_64_192_forward_tk, @function +forkskinny_64_192_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r28,Z+6 + ldd r29,Z+7 + ldd r2,Z+8 + ldd r3,Z+9 + ldd r4,Z+10 + ldd r5,Z+11 + ldd r6,Z+12 + ldd r7,Z+13 + ldd r8,Z+14 + ldd r9,Z+15 + ldd r10,Z+16 + ldd r11,Z+17 + ldd r12,Z+18 + ldd r13,Z+19 + ldd r14,Z+20 + ldd r15,Z+21 + ldd r24,Z+22 + ldd r25,Z+23 + push r31 + push r30 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +27: + push r19 + push r18 + push r21 + push r20 + mov r19,r27 + swap r19 + andi r19,240 + mov r23,r28 + andi r23,15 + or r19,r23 + mov r18,r27 + andi r18,240 + mov r23,r29 + andi r23,15 + or r18,r23 + mov r21,r26 + andi r21,240 + swap r28 + andi r28,15 + or r21,r28 + mov r20,r29 + andi r20,240 + andi r26,15 + or r20,r26 + pop r28 + pop r29 + pop r26 + pop r27 + push r3 + push r2 + push r5 + push r4 + mov r3,r7 + swap r3 + ldi r17,240 + and r3,r17 + mov r23,r8 + andi r23,15 + or r3,r23 + mov r2,r7 + and r2,r17 + mov r23,r9 + andi r23,15 + or r2,r23 + mov r5,r6 + and r5,r17 + swap r8 + ldi r16,15 + and r8,r16 + or r5,r8 + mov r4,r9 + and r4,r17 + and r6,r16 + or r4,r6 + pop r8 + pop r9 + pop r6 + pop r7 + push r11 + push r10 + push r13 + push r12 + mov r11,r15 + swap r11 + and r11,r17 + mov r23,r24 + andi r23,15 + or r11,r23 + mov r10,r15 + and r10,r17 + mov r23,r25 + andi r23,15 + or r10,r23 + mov r13,r14 + and r13,r17 + swap r24 + andi r24,15 + or r13,r24 + mov r12,r25 + and r12,r17 + and r14,r16 + or r12,r14 + pop r24 + pop r25 + pop r14 + pop r15 + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + dec r22 + breq 5125f + rjmp 27b +5125: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r26 + std Z+5,r27 + std Z+6,r28 + std Z+7,r29 + std Z+8,r2 + std Z+9,r3 + std Z+10,r4 + std Z+11,r5 + std Z+12,r6 + std Z+13,r7 + std Z+14,r8 + std Z+15,r9 + std Z+16,r10 + std Z+17,r11 + std Z+18,r12 + std Z+19,r13 + std Z+20,r14 + std Z+21,r15 + std Z+22,r24 + std Z+23,r25 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_forward_tk, .-forkskinny_64_192_forward_tk + + .text +.global forkskinny_64_192_reverse_tk + .type forkskinny_64_192_reverse_tk, @function +forkskinny_64_192_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r28,Z+6 + ldd r29,Z+7 + ldd r2,Z+8 + ldd r3,Z+9 + ldd r4,Z+10 + ldd r5,Z+11 + ldd r6,Z+12 + ldd r7,Z+13 + ldd r8,Z+14 + ldd r9,Z+15 + ldd r10,Z+16 + ldd r11,Z+17 + ldd r12,Z+18 + ldd r13,Z+19 + ldd r14,Z+20 + ldd r15,Z+21 + ldd r24,Z+22 + ldd r25,Z+23 + push r31 + push r30 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +27: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + push r27 + push r26 + push r29 + push r28 + mov r27,r18 + andi r27,240 + mov r23,r19 + swap r23 + andi r23,15 + or r27,r23 + mov r26,r21 + andi r26,240 + mov r23,r20 + andi r23,15 + or r26,r23 + mov r29,r20 + andi r29,240 + andi r18,15 + or r29,r18 + mov r28,r21 + swap r28 + andi r28,240 + andi r19,15 + or r28,r19 + pop r20 + pop r21 + pop r18 + pop r19 + push r7 + push r6 + push r9 + push r8 + mov r7,r2 + ldi r17,240 + and r7,r17 + mov r23,r3 + swap r23 + andi r23,15 + or r7,r23 + mov r6,r5 + and r6,r17 + mov r23,r4 + andi r23,15 + or r6,r23 + mov r9,r4 + and r9,r17 + ldi r16,15 + and r2,r16 + or r9,r2 + mov r8,r5 + swap r8 + and r8,r17 + and r3,r16 + or r8,r3 + pop r4 + pop r5 + pop r2 + pop r3 + push r15 + push r14 + push r25 + push r24 + mov r15,r10 + and r15,r17 + mov r23,r11 + swap r23 + andi r23,15 + or r15,r23 + mov r14,r13 + and r14,r17 + mov r23,r12 + andi r23,15 + or r14,r23 + mov r25,r12 + andi r25,240 + and r10,r16 + or r25,r10 + mov r24,r13 + swap r24 + andi r24,240 + and r11,r16 + or r24,r11 + pop r12 + pop r13 + pop r10 + pop r11 + dec r22 + breq 5125f + rjmp 27b +5125: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r26 + std Z+5,r27 + std Z+6,r28 + std Z+7,r29 + std Z+8,r2 + std Z+9,r3 + std Z+10,r4 + std Z+11,r5 + std Z+12,r6 + std Z+13,r7 + std Z+14,r8 + std Z+15,r9 + std Z+16,r10 + std Z+17,r11 + std Z+18,r12 + std Z+19,r13 + std Z+20,r14 + std Z+21,r15 + std Z+22,r24 + std Z+23,r25 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_reverse_tk, .-forkskinny_64_192_reverse_tk + +#endif diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/rhys/internal-forkskinny.c b/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/rhys/internal-forkskinny.c index b050ff1..6e2ac55 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/rhys/internal-forkskinny.c +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/rhys/internal-forkskinny.c @@ -40,35 +40,10 @@ static unsigned char const RC[87] = { 0x4a, 0x14, 0x29, 0x52, 0x24, 0x48, 0x10 }; -/** - * \brief Number of rounds of ForkSkinny-128-256 before forking. - */ -#define FORKSKINNY_128_256_ROUNDS_BEFORE 21 - -/** - * \brief Number of rounds of ForkSkinny-128-256 after forking. - */ -#define FORKSKINNY_128_256_ROUNDS_AFTER 27 - -/** - * \brief State information for ForkSkinny-128-256. - */ -typedef struct -{ - uint32_t TK1[4]; /**< First part of the tweakey */ - uint32_t TK2[4]; /**< Second part of the tweakey */ - uint32_t S[4]; /**< Current block state */ +#if !defined(__AVR__) -} forkskinny_128_256_state_t; - -/** - * \brief Applies one round of ForkSkinny-128-256. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_256_round - (forkskinny_128_256_state_t *state, unsigned round) +void forkskinny_128_256_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -79,137 +54,52 @@ static void forkskinny_128_256_round s2 = state->S[2]; s3 = state->S[3]; - /* Apply the S-box to all cells in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny128_sbox(s0); + skinny128_sbox(s1); + skinny128_sbox(s2); + skinny128_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); + s2 ^= 0x02; + + /* Shift the cells in the rows right, which moves the cell + * values up closer to the MSB. That is, we do a left rotate + * on the word to rotate the cells in the word right */ + s1 = leftRotate8(s1); + s2 = leftRotate16(s2); + s3 = leftRotate24(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + + /* Permute TK1 and TK2 for the next round */ + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + } /* Save the local variables back to the state */ state->S[0] = s0; state->S[1] = s1; state->S[2] = s2; state->S[3] = s3; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(state->TK1); - skinny128_permute_tk(state->TK2); - skinny128_LFSR2(state->TK2[0]); - skinny128_LFSR2(state->TK2[1]); -} - -void forkskinny_128_256_encrypt - (const unsigned char key[32], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_256_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_128_256_ROUNDS_BEFORE; ++round) { - forkskinny_128_256_round(&state, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint32_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&state, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x08040201U; /* Branching constant */ - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - for (round = (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); ++round) { - forkskinny_128_256_round(&state, round); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&state, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - } } -/** - * \brief Applies one round of ForkSkinny-128-256 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_256_inv_round - (forkskinny_128_256_state_t *state, unsigned round) +void forkskinny_128_256_inv_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -220,39 +110,42 @@ static void forkskinny_128_256_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Permute TK1 and TK2 for the next round */ - skinny128_inv_LFSR2(state->TK2[0]); - skinny128_inv_LFSR2(state->TK2[1]); - skinny128_inv_permute_tk(state->TK1); - skinny128_inv_permute_tk(state->TK2); - - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left, which moves the cell - * values down closer to the LSB. That is, we do a right - * rotate on the word to rotate the cells in the word left */ - s1 = rightRotate8(s1); - s2 = rightRotate16(s2); - s3 = rightRotate24(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1 and TK2 for the next round */ + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left, which moves the cell + * values down closer to the LSB. That is, we do a right + * rotate on the word to rotate the cells in the word left */ + s1 = rightRotate8(s1); + s2 = rightRotate16(s2); + s3 = rightRotate24(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); + s2 ^= 0x02; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny128_inv_sbox(s0); + skinny128_inv_sbox(s1); + skinny128_inv_sbox(s2); + skinny128_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -261,115 +154,64 @@ static void forkskinny_128_256_inv_round state->S[3] = s3; } -void forkskinny_128_256_decrypt - (const unsigned char key[32], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_128_256_forward_tk + (forkskinny_128_256_state_t *state, unsigned rounds) { - forkskinny_128_256_state_t state; - forkskinny_128_256_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); ++round) { - skinny128_permute_tk(state.TK1); - skinny128_permute_tk(state.TK2); - skinny128_LFSR2(state.TK2[0]); - skinny128_LFSR2(state.TK2[1]); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR2(state->TK2[2]); + skinny128_LFSR2(state->TK2[3]); + } + rounds -= 16; } - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); - round > (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); --round) { - forkskinny_128_256_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + --rounds; } +} - /* Remove the branching constant */ - state.S[0] ^= 0x08040201U; - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_128_256_ROUNDS_AFTER; ++round) { - skinny128_inv_LFSR2(state.TK2[0]); - skinny128_inv_LFSR2(state.TK2[1]); - skinny128_inv_permute_tk(state.TK1); - skinny128_inv_permute_tk(state.TK2); +void forkskinny_128_256_reverse_tk + (forkskinny_128_256_state_t *state, unsigned rounds) +{ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR2(state->TK2[2]); + skinny128_inv_LFSR2(state->TK2[3]); + } + rounds -= 16; } - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_128_256_inv_round(&state, round - 1); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&fstate, round); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + --rounds; } - le_store_word32(output_right, fstate.S[0]); - le_store_word32(output_right + 4, fstate.S[1]); - le_store_word32(output_right + 8, fstate.S[2]); - le_store_word32(output_right + 12, fstate.S[3]); } -/** - * \brief Number of rounds of ForkSkinny-128-384 before forking. - */ -#define FORKSKINNY_128_384_ROUNDS_BEFORE 25 - -/** - * \brief Number of rounds of ForkSkinny-128-384 after forking. - */ -#define FORKSKINNY_128_384_ROUNDS_AFTER 31 - -/** - * \brief State information for ForkSkinny-128-384. - */ -typedef struct -{ - uint32_t TK1[4]; /**< First part of the tweakey */ - uint32_t TK2[4]; /**< Second part of the tweakey */ - uint32_t TK3[4]; /**< Third part of the tweakey */ - uint32_t S[4]; /**< Current block state */ - -} forkskinny_128_384_state_t; - -/** - * \brief Applies one round of ForkSkinny-128-384. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_384_round - (forkskinny_128_384_state_t *state, unsigned round) +void forkskinny_128_384_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -380,145 +222,56 @@ static void forkskinny_128_384_round s2 = state->S[2]; s3 = state->S[3]; - /* Apply the S-box to all cells in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - (rc & 0x0F) ^ 0x00020000; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny128_sbox(s0); + skinny128_sbox(s1); + skinny128_sbox(s2); + skinny128_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + (rc & 0x0F) ^ 0x00020000; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); + s2 ^= 0x02; + + /* Shift the cells in the rows right, which moves the cell + * values up closer to the MSB. That is, we do a left rotate + * on the word to rotate the cells in the word right */ + s1 = leftRotate8(s1); + s2 = leftRotate16(s2); + s3 = leftRotate24(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + + /* Permute TK1, TK2, and TK3 for the next round */ + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_permute_tk(state->TK3); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + } /* Save the local variables back to the state */ state->S[0] = s0; state->S[1] = s1; state->S[2] = s2; state->S[3] = s3; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny128_permute_tk(state->TK1); - skinny128_permute_tk(state->TK2); - skinny128_permute_tk(state->TK3); - skinny128_LFSR2(state->TK2[0]); - skinny128_LFSR2(state->TK2[1]); - skinny128_LFSR3(state->TK3[0]); - skinny128_LFSR3(state->TK3[1]); } -void forkskinny_128_384_encrypt - (const unsigned char key[48], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_384_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.TK3[0] = le_load_word32(key + 32); - state.TK3[1] = le_load_word32(key + 36); - state.TK3[2] = le_load_word32(key + 40); - state.TK3[3] = le_load_word32(key + 44); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_128_384_ROUNDS_BEFORE; ++round) { - forkskinny_128_384_round(&state, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint32_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&state, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x08040201U; /* Branching constant */ - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - for (round = (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); ++round) { - forkskinny_128_384_round(&state, round); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&state, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - } -} - -/** - * \brief Applies one round of ForkSkinny-128-384 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_384_inv_round - (forkskinny_128_384_state_t *state, unsigned round) +void forkskinny_128_384_inv_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -529,43 +282,46 @@ static void forkskinny_128_384_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Permute TK1 and TK2 for the next round */ - skinny128_inv_LFSR2(state->TK2[0]); - skinny128_inv_LFSR2(state->TK2[1]); - skinny128_inv_LFSR3(state->TK3[0]); - skinny128_inv_LFSR3(state->TK3[1]); - skinny128_inv_permute_tk(state->TK1); - skinny128_inv_permute_tk(state->TK2); - skinny128_inv_permute_tk(state->TK3); - - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left, which moves the cell - * values down closer to the LSB. That is, we do a right - * rotate on the word to rotate the cells in the word left */ - s1 = rightRotate8(s1); - s2 = rightRotate16(s2); - s3 = rightRotate24(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - (rc & 0x0F) ^ 0x00020000; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1 and TK2 for the next round */ + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + skinny128_inv_permute_tk(state->TK3); + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left, which moves the cell + * values down closer to the LSB. That is, we do a right + * rotate on the word to rotate the cells in the word left */ + s1 = rightRotate8(s1); + s2 = rightRotate16(s2); + s3 = rightRotate24(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + (rc & 0x0F) ^ 0x00020000; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); + s2 ^= 0x02; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny128_inv_sbox(s0); + skinny128_inv_sbox(s1); + skinny128_inv_sbox(s2); + skinny128_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -574,128 +330,78 @@ static void forkskinny_128_384_inv_round state->S[3] = s3; } -void forkskinny_128_384_decrypt - (const unsigned char key[48], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_128_384_forward_tk + (forkskinny_128_384_state_t *state, unsigned rounds) { - forkskinny_128_384_state_t state; - forkskinny_128_384_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.TK3[0] = le_load_word32(key + 32); - state.TK3[1] = le_load_word32(key + 36); - state.TK3[2] = le_load_word32(key + 40); - state.TK3[3] = le_load_word32(key + 44); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); ++round) { - skinny128_permute_tk(state.TK1); - skinny128_permute_tk(state.TK2); - skinny128_permute_tk(state.TK3); - skinny128_LFSR2(state.TK2[0]); - skinny128_LFSR2(state.TK2[1]); - skinny128_LFSR3(state.TK3[0]); - skinny128_LFSR3(state.TK3[1]); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR2(state->TK2[2]); + skinny128_LFSR2(state->TK2[3]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + skinny128_LFSR3(state->TK3[2]); + skinny128_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); - round > (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); --round) { - forkskinny_128_384_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_permute_tk(state->TK3); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + --rounds; } +} - /* Remove the branching constant */ - state.S[0] ^= 0x08040201U; - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_128_384_ROUNDS_AFTER; ++round) { - skinny128_inv_LFSR2(state.TK2[0]); - skinny128_inv_LFSR2(state.TK2[1]); - skinny128_inv_LFSR3(state.TK3[0]); - skinny128_inv_LFSR3(state.TK3[1]); - skinny128_inv_permute_tk(state.TK1); - skinny128_inv_permute_tk(state.TK2); - skinny128_inv_permute_tk(state.TK3); +void forkskinny_128_384_reverse_tk + (forkskinny_128_384_state_t *state, unsigned rounds) +{ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR2(state->TK2[2]); + skinny128_inv_LFSR2(state->TK2[3]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_LFSR3(state->TK3[2]); + skinny128_inv_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_128_384_inv_round(&state, round - 1); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&fstate, round); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + skinny128_inv_permute_tk(state->TK3); + --rounds; } - le_store_word32(output_right, fstate.S[0]); - le_store_word32(output_right + 4, fstate.S[1]); - le_store_word32(output_right + 8, fstate.S[2]); - le_store_word32(output_right + 12, fstate.S[3]); } -/** - * \brief Number of rounds of ForkSkinny-64-192 before forking. - */ -#define FORKSKINNY_64_192_ROUNDS_BEFORE 17 - -/** - * \brief Number of rounds of ForkSkinny-64-192 after forking. - */ -#define FORKSKINNY_64_192_ROUNDS_AFTER 23 - -/** - * \brief State information for ForkSkinny-64-192. - */ -typedef struct -{ - uint16_t TK1[4]; /**< First part of the tweakey */ - uint16_t TK2[4]; /**< Second part of the tweakey */ - uint16_t TK3[4]; /**< Third part of the tweakey */ - uint16_t S[4]; /**< Current block state */ - -} forkskinny_64_192_state_t; - -/** - * \brief Applies one round of ForkSkinny-64-192. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - * - * Note: The cells of each row are order in big-endian nibble order - * so it is easiest to manage the rows in bit-endian byte order. - */ -static void forkskinny_64_192_round - (forkskinny_64_192_state_t *state, unsigned round) +void forkskinny_64_192_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last) { uint16_t s0, s1, s2, s3, temp; uint8_t rc; @@ -706,144 +412,55 @@ static void forkskinny_64_192_round s2 = state->S[2]; s3 = state->S[3]; - /* Apply the S-box to all cells in the state */ - skinny64_sbox(s0); - skinny64_sbox(s1); - skinny64_sbox(s2); - skinny64_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - ((rc & 0x0F) << 12) ^ 0x0020; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ - ((rc & 0x70) << 8); - s2 ^= 0x2000; - - /* Shift the cells in the rows right */ - s1 = rightRotate4_16(s1); - s2 = rightRotate8_16(s2); - s3 = rightRotate12_16(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny64_sbox(s0); + skinny64_sbox(s1); + skinny64_sbox(s2); + skinny64_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + ((rc & 0x0F) << 12) ^ 0x0020; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ + ((rc & 0x70) << 8); + s2 ^= 0x2000; + + /* Shift the cells in the rows right */ + s1 = rightRotate4_16(s1); + s2 = rightRotate8_16(s2); + s3 = rightRotate12_16(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + + /* Permute TK1, TK2, and TK3 for the next round */ + skinny64_permute_tk(state->TK1); + skinny64_permute_tk(state->TK2); + skinny64_permute_tk(state->TK3); + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + } /* Save the local variables back to the state */ state->S[0] = s0; state->S[1] = s1; state->S[2] = s2; state->S[3] = s3; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny64_permute_tk(state->TK1); - skinny64_permute_tk(state->TK2); - skinny64_permute_tk(state->TK3); - skinny64_LFSR2(state->TK2[0]); - skinny64_LFSR2(state->TK2[1]); - skinny64_LFSR3(state->TK3[0]); - skinny64_LFSR3(state->TK3[1]); } -void forkskinny_64_192_encrypt - (const unsigned char key[24], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_64_192_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = be_load_word16(key); - state.TK1[1] = be_load_word16(key + 2); - state.TK1[2] = be_load_word16(key + 4); - state.TK1[3] = be_load_word16(key + 6); - state.TK2[0] = be_load_word16(key + 8); - state.TK2[1] = be_load_word16(key + 10); - state.TK2[2] = be_load_word16(key + 12); - state.TK2[3] = be_load_word16(key + 14); - state.TK3[0] = be_load_word16(key + 16); - state.TK3[1] = be_load_word16(key + 18); - state.TK3[2] = be_load_word16(key + 20); - state.TK3[3] = be_load_word16(key + 22); - state.S[0] = be_load_word16(input); - state.S[1] = be_load_word16(input + 2); - state.S[2] = be_load_word16(input + 4); - state.S[3] = be_load_word16(input + 6); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_64_192_ROUNDS_BEFORE; ++round) { - forkskinny_64_192_round(&state, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint16_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_right, state.S[0]); - be_store_word16(output_right + 2, state.S[1]); - be_store_word16(output_right + 4, state.S[2]); - be_store_word16(output_right + 6, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x1249U; /* Branching constant */ - state.S[1] ^= 0x36daU; - state.S[2] ^= 0x5b7fU; - state.S[3] ^= 0xec81U; - for (round = (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_left, state.S[0]); - be_store_word16(output_left + 2, state.S[1]); - be_store_word16(output_left + 4, state.S[2]); - be_store_word16(output_left + 6, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_right, state.S[0]); - be_store_word16(output_right + 2, state.S[1]); - be_store_word16(output_right + 4, state.S[2]); - be_store_word16(output_right + 6, state.S[3]); - } -} - -/** - * \brief Applies one round of ForkSkinny-64-192 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_64_192_inv_round - (forkskinny_64_192_state_t *state, unsigned round) +void forkskinny_64_192_inv_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last) { uint16_t s0, s1, s2, s3, temp; uint8_t rc; @@ -854,42 +471,45 @@ static void forkskinny_64_192_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Permute TK1, TK2, and TK3 for the next round */ - skinny64_inv_LFSR2(state->TK2[0]); - skinny64_inv_LFSR2(state->TK2[1]); - skinny64_inv_LFSR3(state->TK3[0]); - skinny64_inv_LFSR3(state->TK3[1]); - skinny64_inv_permute_tk(state->TK1); - skinny64_inv_permute_tk(state->TK2); - skinny64_inv_permute_tk(state->TK3); - - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left */ - s1 = leftRotate4_16(s1); - s2 = leftRotate8_16(s2); - s3 = leftRotate12_16(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - ((rc & 0x0F) << 12) ^ 0x0020; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ - ((rc & 0x70) << 8); - s2 ^= 0x2000; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny64_inv_sbox(s0); - skinny64_inv_sbox(s1); - skinny64_inv_sbox(s2); - skinny64_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1, TK2, and TK3 for the next round */ + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_permute_tk(state->TK1); + skinny64_inv_permute_tk(state->TK2); + skinny64_inv_permute_tk(state->TK3); + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left */ + s1 = leftRotate4_16(s1); + s2 = leftRotate8_16(s2); + s3 = leftRotate12_16(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + ((rc & 0x0F) << 12) ^ 0x0020; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ + ((rc & 0x70) << 8); + s2 ^= 0x2000; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny64_inv_sbox(s0); + skinny64_inv_sbox(s1); + skinny64_inv_sbox(s2); + skinny64_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -898,91 +518,74 @@ static void forkskinny_64_192_inv_round state->S[3] = s3; } -void forkskinny_64_192_decrypt - (const unsigned char key[24], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_64_192_forward_tk + (forkskinny_64_192_state_t *state, unsigned rounds) { - forkskinny_64_192_state_t state; - forkskinny_64_192_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = be_load_word16(key); - state.TK1[1] = be_load_word16(key + 2); - state.TK1[2] = be_load_word16(key + 4); - state.TK1[3] = be_load_word16(key + 6); - state.TK2[0] = be_load_word16(key + 8); - state.TK2[1] = be_load_word16(key + 10); - state.TK2[2] = be_load_word16(key + 12); - state.TK2[3] = be_load_word16(key + 14); - state.TK3[0] = be_load_word16(key + 16); - state.TK3[1] = be_load_word16(key + 18); - state.TK3[2] = be_load_word16(key + 20); - state.TK3[3] = be_load_word16(key + 22); - state.S[0] = be_load_word16(input); - state.S[1] = be_load_word16(input + 2); - state.S[2] = be_load_word16(input + 4); - state.S[3] = be_load_word16(input + 6); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); ++round) { - skinny64_permute_tk(state.TK1); - skinny64_permute_tk(state.TK2); - skinny64_permute_tk(state.TK3); - skinny64_LFSR2(state.TK2[0]); - skinny64_LFSR2(state.TK2[1]); - skinny64_LFSR3(state.TK3[0]); - skinny64_LFSR3(state.TK3[1]); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny64_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR2(state->TK2[2]); + skinny64_LFSR2(state->TK2[3]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + skinny64_LFSR3(state->TK3[2]); + skinny64_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); - round > (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); --round) { - forkskinny_64_192_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny64_permute_tk(state->TK1); + skinny64_permute_tk(state->TK2); + skinny64_permute_tk(state->TK3); + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + --rounds; } +} - /* Remove the branching constant */ - state.S[0] ^= 0x1249U; - state.S[1] ^= 0x36daU; - state.S[2] ^= 0x5b7fU; - state.S[3] ^= 0xec81U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_64_192_ROUNDS_AFTER; ++round) { - skinny64_inv_LFSR2(state.TK2[0]); - skinny64_inv_LFSR2(state.TK2[1]); - skinny64_inv_LFSR3(state.TK3[0]); - skinny64_inv_LFSR3(state.TK3[1]); - skinny64_inv_permute_tk(state.TK1); - skinny64_inv_permute_tk(state.TK2); - skinny64_inv_permute_tk(state.TK3); +void forkskinny_64_192_reverse_tk + (forkskinny_64_192_state_t *state, unsigned rounds) +{ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny64_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR2(state->TK2[2]); + skinny64_inv_LFSR2(state->TK2[3]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_LFSR3(state->TK3[2]); + skinny64_inv_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_64_192_inv_round(&state, round - 1); - } - be_store_word16(output_left, state.S[0]); - be_store_word16(output_left + 2, state.S[1]); - be_store_word16(output_left + 4, state.S[2]); - be_store_word16(output_left + 6, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&fstate, round); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_permute_tk(state->TK1); + skinny64_inv_permute_tk(state->TK2); + skinny64_inv_permute_tk(state->TK3); + --rounds; } - be_store_word16(output_right, fstate.S[0]); - be_store_word16(output_right + 2, fstate.S[1]); - be_store_word16(output_right + 4, fstate.S[2]); - be_store_word16(output_right + 6, fstate.S[3]); } + +#endif /* !__AVR__ */ diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/rhys/internal-forkskinny.h b/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/rhys/internal-forkskinny.h index 0c1a707..e3014d3 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/rhys/internal-forkskinny.h +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/rhys/internal-forkskinny.h @@ -23,6 +23,8 @@ #ifndef LW_INTERNAL_FORKSKINNY_H #define LW_INTERNAL_FORKSKINNY_H +#include "internal-util.h" + /** * \file internal-forkskinny.h * \brief ForkSkinny block cipher family. @@ -39,6 +41,158 @@ extern "C" { #endif /** + * \brief State information for ForkSkinny-128-256. + */ +typedef struct +{ + uint32_t TK1[4]; /**< First part of the tweakey */ + uint32_t TK2[4]; /**< Second part of the tweakey */ + uint32_t S[4]; /**< Current block state */ + +} forkskinny_128_256_state_t; + +/** + * \brief State information for ForkSkinny-128-384. + */ +typedef struct +{ + uint32_t TK1[4]; /**< First part of the tweakey */ + uint32_t TK2[4]; /**< Second part of the tweakey */ + uint32_t TK3[4]; /**< Third part of the tweakey */ + uint32_t S[4]; /**< Current block state */ + +} forkskinny_128_384_state_t; + +/** + * \brief State information for ForkSkinny-64-192. + */ +typedef struct +{ + uint16_t TK1[4]; /**< First part of the tweakey */ + uint16_t TK2[4]; /**< Second part of the tweakey */ + uint16_t TK3[4]; /**< Third part of the tweakey */ + uint16_t S[4]; /**< Current block state */ + +} forkskinny_64_192_state_t; + +/** + * \brief Applies several rounds of ForkSkinny-128-256. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + */ +void forkskinny_128_256_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-128-256 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_128_256_inv_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-128-256. + * + * \param state Points to the ForkSkinny-128-256 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_128_256_forward_tk + (forkskinny_128_256_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-128-256. + * + * \param state Points to the ForkSkinny-128-256 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_128_256_reverse_tk + (forkskinny_128_256_state_t *state, unsigned rounds); + +/** + * \brief Applies several rounds of ForkSkinny-128-384. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + */ +void forkskinny_128_384_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-128-384 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_128_384_inv_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-128-384. + * + * \param state Points to the ForkSkinny-128-384 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_128_384_forward_tk + (forkskinny_128_384_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-128-384. + * + * \param state Points to the ForkSkinny-128-384 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_128_384_reverse_tk + (forkskinny_128_384_state_t *state, unsigned rounds); + +/** + * \brief Applies several rounds of ForkSkinny-64-192. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + * + * Note: The cells of each row are ordered in big-endian nibble order + * so it is simplest to manage the rows in big-endian byte order. + */ +void forkskinny_64_192_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-64-192 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_64_192_inv_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-64-192. + * + * \param state Points to the ForkSkinny-64-192 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_64_192_forward_tk + (forkskinny_64_192_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-64-192. + * + * \param state Points to the ForkSkinny-64-192 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_64_192_reverse_tk + (forkskinny_64_192_state_t *state, unsigned rounds); + +/** * \brief Encrypts a block of plaintext with ForkSkinny-128-256. * * \param key 256-bit tweakey for ForkSkinny-128-256. diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/rhys/internal-skinnyutil.h b/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/rhys/internal-skinnyutil.h index 83136cb..8a5296d 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/rhys/internal-skinnyutil.h +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t192n48v1/rhys/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/rhys/forkae.c b/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/rhys/forkae.c index 4a9671a..49e7610 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/rhys/forkae.c +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/rhys/forkae.c @@ -22,7 +22,6 @@ #include "forkae.h" #include "internal-forkskinny.h" -#include "internal-util.h" #include aead_cipher_t const forkae_paef_64_192_cipher = { @@ -138,3 +137,476 @@ aead_cipher_t const forkae_saef_128_256_cipher = { #define FORKAE_TWEAKEY_REDUCED_SIZE 32 #define FORKAE_BLOCK_FUNC forkskinny_128_256 #include "internal-forkae-saef.h" + +/* Helper functions to implement the forking encrypt/decrypt block operations + * on top of the basic "perform N rounds" functions in internal-forkskinny.c */ + +/** + * \brief Number of rounds of ForkSkinny-128-256 before forking. + */ +#define FORKSKINNY_128_256_ROUNDS_BEFORE 21 + +/** + * \brief Number of rounds of ForkSkinny-128-256 after forking. + */ +#define FORKSKINNY_128_256_ROUNDS_AFTER 27 + +void forkskinny_128_256_encrypt + (const unsigned char key[32], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_256_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Run all of the rounds before the forking point */ + forkskinny_128_256_rounds(&state, 0, FORKSKINNY_128_256_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint32_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x08040201U; /* Branching constant */ + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + } +} + +void forkskinny_128_256_decrypt + (const unsigned char key[32], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_256_state_t state; + forkskinny_128_256_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_128_256_forward_tk + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_128_256_inv_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x08040201U; + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_128_256_reverse_tk(&state, FORKSKINNY_128_256_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_128_256_inv_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, 0); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_128_256_rounds + (&fstate, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, fstate.S[0]); + le_store_word32(output_right + 4, fstate.S[1]); + le_store_word32(output_right + 8, fstate.S[2]); + le_store_word32(output_right + 12, fstate.S[3]); +} + +/** + * \brief Number of rounds of ForkSkinny-128-384 before forking. + */ +#define FORKSKINNY_128_384_ROUNDS_BEFORE 25 + +/** + * \brief Number of rounds of ForkSkinny-128-384 after forking. + */ +#define FORKSKINNY_128_384_ROUNDS_AFTER 31 + +void forkskinny_128_384_encrypt + (const unsigned char key[48], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_384_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.TK3[0] = le_load_word32(key + 32); + state.TK3[1] = le_load_word32(key + 36); + state.TK3[2] = le_load_word32(key + 40); + state.TK3[3] = le_load_word32(key + 44); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Run all of the rounds before the forking point */ + forkskinny_128_384_rounds(&state, 0, FORKSKINNY_128_384_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint32_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x08040201U; /* Branching constant */ + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + } +} + +void forkskinny_128_384_decrypt + (const unsigned char key[48], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_384_state_t state; + forkskinny_128_384_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.TK3[0] = le_load_word32(key + 32); + state.TK3[1] = le_load_word32(key + 36); + state.TK3[2] = le_load_word32(key + 40); + state.TK3[3] = le_load_word32(key + 44); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_128_384_forward_tk + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_128_384_inv_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x08040201U; + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_128_384_reverse_tk(&state, FORKSKINNY_128_384_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_128_384_inv_rounds(&state, FORKSKINNY_128_384_ROUNDS_BEFORE, 0); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_128_384_rounds + (&fstate, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, fstate.S[0]); + le_store_word32(output_right + 4, fstate.S[1]); + le_store_word32(output_right + 8, fstate.S[2]); + le_store_word32(output_right + 12, fstate.S[3]); +} + +/** + * \brief Number of rounds of ForkSkinny-64-192 before forking. + */ +#define FORKSKINNY_64_192_ROUNDS_BEFORE 17 + +/** + * \brief Number of rounds of ForkSkinny-64-192 after forking. + */ +#define FORKSKINNY_64_192_ROUNDS_AFTER 23 + +void forkskinny_64_192_encrypt + (const unsigned char key[24], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_64_192_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = be_load_word16(key); + state.TK1[1] = be_load_word16(key + 2); + state.TK1[2] = be_load_word16(key + 4); + state.TK1[3] = be_load_word16(key + 6); + state.TK2[0] = be_load_word16(key + 8); + state.TK2[1] = be_load_word16(key + 10); + state.TK2[2] = be_load_word16(key + 12); + state.TK2[3] = be_load_word16(key + 14); + state.TK3[0] = be_load_word16(key + 16); + state.TK3[1] = be_load_word16(key + 18); + state.TK3[2] = be_load_word16(key + 20); + state.TK3[3] = be_load_word16(key + 22); + state.S[0] = be_load_word16(input); + state.S[1] = be_load_word16(input + 2); + state.S[2] = be_load_word16(input + 4); + state.S[3] = be_load_word16(input + 6); + + /* Run all of the rounds before the forking point */ + forkskinny_64_192_rounds(&state, 0, FORKSKINNY_64_192_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint16_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, state.S[0]); + be_store_word16(output_right + 2, state.S[1]); + be_store_word16(output_right + 4, state.S[2]); + be_store_word16(output_right + 6, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x1249U; /* Branching constant */ + state.S[1] ^= 0x36daU; + state.S[2] ^= 0x5b7fU; + state.S[3] ^= 0xec81U; + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2); + be_store_word16(output_left, state.S[0]); + be_store_word16(output_left + 2, state.S[1]); + be_store_word16(output_left + 4, state.S[2]); + be_store_word16(output_left + 6, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, state.S[0]); + be_store_word16(output_right + 2, state.S[1]); + be_store_word16(output_right + 4, state.S[2]); + be_store_word16(output_right + 6, state.S[3]); + } +} + +void forkskinny_64_192_decrypt + (const unsigned char key[24], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_64_192_state_t state; + forkskinny_64_192_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = be_load_word16(key); + state.TK1[1] = be_load_word16(key + 2); + state.TK1[2] = be_load_word16(key + 4); + state.TK1[3] = be_load_word16(key + 6); + state.TK2[0] = be_load_word16(key + 8); + state.TK2[1] = be_load_word16(key + 10); + state.TK2[2] = be_load_word16(key + 12); + state.TK2[3] = be_load_word16(key + 14); + state.TK3[0] = be_load_word16(key + 16); + state.TK3[1] = be_load_word16(key + 18); + state.TK3[2] = be_load_word16(key + 20); + state.TK3[3] = be_load_word16(key + 22); + state.S[0] = be_load_word16(input); + state.S[1] = be_load_word16(input + 2); + state.S[2] = be_load_word16(input + 4); + state.S[3] = be_load_word16(input + 6); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_64_192_forward_tk + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_64_192_inv_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x1249U; + state.S[1] ^= 0x36daU; + state.S[2] ^= 0x5b7fU; + state.S[3] ^= 0xec81U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_64_192_reverse_tk(&state, FORKSKINNY_64_192_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_64_192_inv_rounds(&state, FORKSKINNY_64_192_ROUNDS_BEFORE, 0); + be_store_word16(output_left, state.S[0]); + be_store_word16(output_left + 2, state.S[1]); + be_store_word16(output_left + 4, state.S[2]); + be_store_word16(output_left + 6, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_64_192_rounds + (&fstate, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, fstate.S[0]); + be_store_word16(output_right + 2, fstate.S[1]); + be_store_word16(output_right + 4, fstate.S[2]); + be_store_word16(output_right + 6, fstate.S[3]); +} diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/rhys/internal-forkskinny-avr.S b/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/rhys/internal-forkskinny-avr.S new file mode 100644 index 0000000..c7e0b37 --- /dev/null +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/rhys/internal-forkskinny-avr.S @@ -0,0 +1,8880 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_0, @object + .size table_0, 256 +table_0: + .byte 101 + .byte 76 + .byte 106 + .byte 66 + .byte 75 + .byte 99 + .byte 67 + .byte 107 + .byte 85 + .byte 117 + .byte 90 + .byte 122 + .byte 83 + .byte 115 + .byte 91 + .byte 123 + .byte 53 + .byte 140 + .byte 58 + .byte 129 + .byte 137 + .byte 51 + .byte 128 + .byte 59 + .byte 149 + .byte 37 + .byte 152 + .byte 42 + .byte 144 + .byte 35 + .byte 153 + .byte 43 + .byte 229 + .byte 204 + .byte 232 + .byte 193 + .byte 201 + .byte 224 + .byte 192 + .byte 233 + .byte 213 + .byte 245 + .byte 216 + .byte 248 + .byte 208 + .byte 240 + .byte 217 + .byte 249 + .byte 165 + .byte 28 + .byte 168 + .byte 18 + .byte 27 + .byte 160 + .byte 19 + .byte 169 + .byte 5 + .byte 181 + .byte 10 + .byte 184 + .byte 3 + .byte 176 + .byte 11 + .byte 185 + .byte 50 + .byte 136 + .byte 60 + .byte 133 + .byte 141 + .byte 52 + .byte 132 + .byte 61 + .byte 145 + .byte 34 + .byte 156 + .byte 44 + .byte 148 + .byte 36 + .byte 157 + .byte 45 + .byte 98 + .byte 74 + .byte 108 + .byte 69 + .byte 77 + .byte 100 + .byte 68 + .byte 109 + .byte 82 + .byte 114 + .byte 92 + .byte 124 + .byte 84 + .byte 116 + .byte 93 + .byte 125 + .byte 161 + .byte 26 + .byte 172 + .byte 21 + .byte 29 + .byte 164 + .byte 20 + .byte 173 + .byte 2 + .byte 177 + .byte 12 + .byte 188 + .byte 4 + .byte 180 + .byte 13 + .byte 189 + .byte 225 + .byte 200 + .byte 236 + .byte 197 + .byte 205 + .byte 228 + .byte 196 + .byte 237 + .byte 209 + .byte 241 + .byte 220 + .byte 252 + .byte 212 + .byte 244 + .byte 221 + .byte 253 + .byte 54 + .byte 142 + .byte 56 + .byte 130 + .byte 139 + .byte 48 + .byte 131 + .byte 57 + .byte 150 + .byte 38 + .byte 154 + .byte 40 + .byte 147 + .byte 32 + .byte 155 + .byte 41 + .byte 102 + .byte 78 + .byte 104 + .byte 65 + .byte 73 + .byte 96 + .byte 64 + .byte 105 + .byte 86 + .byte 118 + .byte 88 + .byte 120 + .byte 80 + .byte 112 + .byte 89 + .byte 121 + .byte 166 + .byte 30 + .byte 170 + .byte 17 + .byte 25 + .byte 163 + .byte 16 + .byte 171 + .byte 6 + .byte 182 + .byte 8 + .byte 186 + .byte 0 + .byte 179 + .byte 9 + .byte 187 + .byte 230 + .byte 206 + .byte 234 + .byte 194 + .byte 203 + .byte 227 + .byte 195 + .byte 235 + .byte 214 + .byte 246 + .byte 218 + .byte 250 + .byte 211 + .byte 243 + .byte 219 + .byte 251 + .byte 49 + .byte 138 + .byte 62 + .byte 134 + .byte 143 + .byte 55 + .byte 135 + .byte 63 + .byte 146 + .byte 33 + .byte 158 + .byte 46 + .byte 151 + .byte 39 + .byte 159 + .byte 47 + .byte 97 + .byte 72 + .byte 110 + .byte 70 + .byte 79 + .byte 103 + .byte 71 + .byte 111 + .byte 81 + .byte 113 + .byte 94 + .byte 126 + .byte 87 + .byte 119 + .byte 95 + .byte 127 + .byte 162 + .byte 24 + .byte 174 + .byte 22 + .byte 31 + .byte 167 + .byte 23 + .byte 175 + .byte 1 + .byte 178 + .byte 14 + .byte 190 + .byte 7 + .byte 183 + .byte 15 + .byte 191 + .byte 226 + .byte 202 + .byte 238 + .byte 198 + .byte 207 + .byte 231 + .byte 199 + .byte 239 + .byte 210 + .byte 242 + .byte 222 + .byte 254 + .byte 215 + .byte 247 + .byte 223 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_1, @object + .size table_1, 256 +table_1: + .byte 172 + .byte 232 + .byte 104 + .byte 60 + .byte 108 + .byte 56 + .byte 168 + .byte 236 + .byte 170 + .byte 174 + .byte 58 + .byte 62 + .byte 106 + .byte 110 + .byte 234 + .byte 238 + .byte 166 + .byte 163 + .byte 51 + .byte 54 + .byte 102 + .byte 99 + .byte 227 + .byte 230 + .byte 225 + .byte 164 + .byte 97 + .byte 52 + .byte 49 + .byte 100 + .byte 161 + .byte 228 + .byte 141 + .byte 201 + .byte 73 + .byte 29 + .byte 77 + .byte 25 + .byte 137 + .byte 205 + .byte 139 + .byte 143 + .byte 27 + .byte 31 + .byte 75 + .byte 79 + .byte 203 + .byte 207 + .byte 133 + .byte 192 + .byte 64 + .byte 21 + .byte 69 + .byte 16 + .byte 128 + .byte 197 + .byte 130 + .byte 135 + .byte 18 + .byte 23 + .byte 66 + .byte 71 + .byte 194 + .byte 199 + .byte 150 + .byte 147 + .byte 3 + .byte 6 + .byte 86 + .byte 83 + .byte 211 + .byte 214 + .byte 209 + .byte 148 + .byte 81 + .byte 4 + .byte 1 + .byte 84 + .byte 145 + .byte 212 + .byte 156 + .byte 216 + .byte 88 + .byte 12 + .byte 92 + .byte 8 + .byte 152 + .byte 220 + .byte 154 + .byte 158 + .byte 10 + .byte 14 + .byte 90 + .byte 94 + .byte 218 + .byte 222 + .byte 149 + .byte 208 + .byte 80 + .byte 5 + .byte 85 + .byte 0 + .byte 144 + .byte 213 + .byte 146 + .byte 151 + .byte 2 + .byte 7 + .byte 82 + .byte 87 + .byte 210 + .byte 215 + .byte 157 + .byte 217 + .byte 89 + .byte 13 + .byte 93 + .byte 9 + .byte 153 + .byte 221 + .byte 155 + .byte 159 + .byte 11 + .byte 15 + .byte 91 + .byte 95 + .byte 219 + .byte 223 + .byte 22 + .byte 19 + .byte 131 + .byte 134 + .byte 70 + .byte 67 + .byte 195 + .byte 198 + .byte 65 + .byte 20 + .byte 193 + .byte 132 + .byte 17 + .byte 68 + .byte 129 + .byte 196 + .byte 28 + .byte 72 + .byte 200 + .byte 140 + .byte 76 + .byte 24 + .byte 136 + .byte 204 + .byte 26 + .byte 30 + .byte 138 + .byte 142 + .byte 74 + .byte 78 + .byte 202 + .byte 206 + .byte 53 + .byte 96 + .byte 224 + .byte 165 + .byte 101 + .byte 48 + .byte 160 + .byte 229 + .byte 50 + .byte 55 + .byte 162 + .byte 167 + .byte 98 + .byte 103 + .byte 226 + .byte 231 + .byte 61 + .byte 105 + .byte 233 + .byte 173 + .byte 109 + .byte 57 + .byte 169 + .byte 237 + .byte 59 + .byte 63 + .byte 171 + .byte 175 + .byte 107 + .byte 111 + .byte 235 + .byte 239 + .byte 38 + .byte 35 + .byte 179 + .byte 182 + .byte 118 + .byte 115 + .byte 243 + .byte 246 + .byte 113 + .byte 36 + .byte 241 + .byte 180 + .byte 33 + .byte 116 + .byte 177 + .byte 244 + .byte 44 + .byte 120 + .byte 248 + .byte 188 + .byte 124 + .byte 40 + .byte 184 + .byte 252 + .byte 42 + .byte 46 + .byte 186 + .byte 190 + .byte 122 + .byte 126 + .byte 250 + .byte 254 + .byte 37 + .byte 112 + .byte 240 + .byte 181 + .byte 117 + .byte 32 + .byte 176 + .byte 245 + .byte 34 + .byte 39 + .byte 178 + .byte 183 + .byte 114 + .byte 119 + .byte 242 + .byte 247 + .byte 45 + .byte 121 + .byte 249 + .byte 189 + .byte 125 + .byte 41 + .byte 185 + .byte 253 + .byte 43 + .byte 47 + .byte 187 + .byte 191 + .byte 123 + .byte 127 + .byte 251 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_2, @object + .size table_2, 256 +table_2: + .byte 0 + .byte 2 + .byte 4 + .byte 6 + .byte 8 + .byte 10 + .byte 12 + .byte 14 + .byte 16 + .byte 18 + .byte 20 + .byte 22 + .byte 24 + .byte 26 + .byte 28 + .byte 30 + .byte 32 + .byte 34 + .byte 36 + .byte 38 + .byte 40 + .byte 42 + .byte 44 + .byte 46 + .byte 48 + .byte 50 + .byte 52 + .byte 54 + .byte 56 + .byte 58 + .byte 60 + .byte 62 + .byte 65 + .byte 67 + .byte 69 + .byte 71 + .byte 73 + .byte 75 + .byte 77 + .byte 79 + .byte 81 + .byte 83 + .byte 85 + .byte 87 + .byte 89 + .byte 91 + .byte 93 + .byte 95 + .byte 97 + .byte 99 + .byte 101 + .byte 103 + .byte 105 + .byte 107 + .byte 109 + .byte 111 + .byte 113 + .byte 115 + .byte 117 + .byte 119 + .byte 121 + .byte 123 + .byte 125 + .byte 127 + .byte 128 + .byte 130 + .byte 132 + .byte 134 + .byte 136 + .byte 138 + .byte 140 + .byte 142 + .byte 144 + .byte 146 + .byte 148 + .byte 150 + .byte 152 + .byte 154 + .byte 156 + .byte 158 + .byte 160 + .byte 162 + .byte 164 + .byte 166 + .byte 168 + .byte 170 + .byte 172 + .byte 174 + .byte 176 + .byte 178 + .byte 180 + .byte 182 + .byte 184 + .byte 186 + .byte 188 + .byte 190 + .byte 193 + .byte 195 + .byte 197 + .byte 199 + .byte 201 + .byte 203 + .byte 205 + .byte 207 + .byte 209 + .byte 211 + .byte 213 + .byte 215 + .byte 217 + .byte 219 + .byte 221 + .byte 223 + .byte 225 + .byte 227 + .byte 229 + .byte 231 + .byte 233 + .byte 235 + .byte 237 + .byte 239 + .byte 241 + .byte 243 + .byte 245 + .byte 247 + .byte 249 + .byte 251 + .byte 253 + .byte 255 + .byte 1 + .byte 3 + .byte 5 + .byte 7 + .byte 9 + .byte 11 + .byte 13 + .byte 15 + .byte 17 + .byte 19 + .byte 21 + .byte 23 + .byte 25 + .byte 27 + .byte 29 + .byte 31 + .byte 33 + .byte 35 + .byte 37 + .byte 39 + .byte 41 + .byte 43 + .byte 45 + .byte 47 + .byte 49 + .byte 51 + .byte 53 + .byte 55 + .byte 57 + .byte 59 + .byte 61 + .byte 63 + .byte 64 + .byte 66 + .byte 68 + .byte 70 + .byte 72 + .byte 74 + .byte 76 + .byte 78 + .byte 80 + .byte 82 + .byte 84 + .byte 86 + .byte 88 + .byte 90 + .byte 92 + .byte 94 + .byte 96 + .byte 98 + .byte 100 + .byte 102 + .byte 104 + .byte 106 + .byte 108 + .byte 110 + .byte 112 + .byte 114 + .byte 116 + .byte 118 + .byte 120 + .byte 122 + .byte 124 + .byte 126 + .byte 129 + .byte 131 + .byte 133 + .byte 135 + .byte 137 + .byte 139 + .byte 141 + .byte 143 + .byte 145 + .byte 147 + .byte 149 + .byte 151 + .byte 153 + .byte 155 + .byte 157 + .byte 159 + .byte 161 + .byte 163 + .byte 165 + .byte 167 + .byte 169 + .byte 171 + .byte 173 + .byte 175 + .byte 177 + .byte 179 + .byte 181 + .byte 183 + .byte 185 + .byte 187 + .byte 189 + .byte 191 + .byte 192 + .byte 194 + .byte 196 + .byte 198 + .byte 200 + .byte 202 + .byte 204 + .byte 206 + .byte 208 + .byte 210 + .byte 212 + .byte 214 + .byte 216 + .byte 218 + .byte 220 + .byte 222 + .byte 224 + .byte 226 + .byte 228 + .byte 230 + .byte 232 + .byte 234 + .byte 236 + .byte 238 + .byte 240 + .byte 242 + .byte 244 + .byte 246 + .byte 248 + .byte 250 + .byte 252 + .byte 254 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_3, @object + .size table_3, 256 +table_3: + .byte 0 + .byte 128 + .byte 1 + .byte 129 + .byte 2 + .byte 130 + .byte 3 + .byte 131 + .byte 4 + .byte 132 + .byte 5 + .byte 133 + .byte 6 + .byte 134 + .byte 7 + .byte 135 + .byte 8 + .byte 136 + .byte 9 + .byte 137 + .byte 10 + .byte 138 + .byte 11 + .byte 139 + .byte 12 + .byte 140 + .byte 13 + .byte 141 + .byte 14 + .byte 142 + .byte 15 + .byte 143 + .byte 16 + .byte 144 + .byte 17 + .byte 145 + .byte 18 + .byte 146 + .byte 19 + .byte 147 + .byte 20 + .byte 148 + .byte 21 + .byte 149 + .byte 22 + .byte 150 + .byte 23 + .byte 151 + .byte 24 + .byte 152 + .byte 25 + .byte 153 + .byte 26 + .byte 154 + .byte 27 + .byte 155 + .byte 28 + .byte 156 + .byte 29 + .byte 157 + .byte 30 + .byte 158 + .byte 31 + .byte 159 + .byte 160 + .byte 32 + .byte 161 + .byte 33 + .byte 162 + .byte 34 + .byte 163 + .byte 35 + .byte 164 + .byte 36 + .byte 165 + .byte 37 + .byte 166 + .byte 38 + .byte 167 + .byte 39 + .byte 168 + .byte 40 + .byte 169 + .byte 41 + .byte 170 + .byte 42 + .byte 171 + .byte 43 + .byte 172 + .byte 44 + .byte 173 + .byte 45 + .byte 174 + .byte 46 + .byte 175 + .byte 47 + .byte 176 + .byte 48 + .byte 177 + .byte 49 + .byte 178 + .byte 50 + .byte 179 + .byte 51 + .byte 180 + .byte 52 + .byte 181 + .byte 53 + .byte 182 + .byte 54 + .byte 183 + .byte 55 + .byte 184 + .byte 56 + .byte 185 + .byte 57 + .byte 186 + .byte 58 + .byte 187 + .byte 59 + .byte 188 + .byte 60 + .byte 189 + .byte 61 + .byte 190 + .byte 62 + .byte 191 + .byte 63 + .byte 64 + .byte 192 + .byte 65 + .byte 193 + .byte 66 + .byte 194 + .byte 67 + .byte 195 + .byte 68 + .byte 196 + .byte 69 + .byte 197 + .byte 70 + .byte 198 + .byte 71 + .byte 199 + .byte 72 + .byte 200 + .byte 73 + .byte 201 + .byte 74 + .byte 202 + .byte 75 + .byte 203 + .byte 76 + .byte 204 + .byte 77 + .byte 205 + .byte 78 + .byte 206 + .byte 79 + .byte 207 + .byte 80 + .byte 208 + .byte 81 + .byte 209 + .byte 82 + .byte 210 + .byte 83 + .byte 211 + .byte 84 + .byte 212 + .byte 85 + .byte 213 + .byte 86 + .byte 214 + .byte 87 + .byte 215 + .byte 88 + .byte 216 + .byte 89 + .byte 217 + .byte 90 + .byte 218 + .byte 91 + .byte 219 + .byte 92 + .byte 220 + .byte 93 + .byte 221 + .byte 94 + .byte 222 + .byte 95 + .byte 223 + .byte 224 + .byte 96 + .byte 225 + .byte 97 + .byte 226 + .byte 98 + .byte 227 + .byte 99 + .byte 228 + .byte 100 + .byte 229 + .byte 101 + .byte 230 + .byte 102 + .byte 231 + .byte 103 + .byte 232 + .byte 104 + .byte 233 + .byte 105 + .byte 234 + .byte 106 + .byte 235 + .byte 107 + .byte 236 + .byte 108 + .byte 237 + .byte 109 + .byte 238 + .byte 110 + .byte 239 + .byte 111 + .byte 240 + .byte 112 + .byte 241 + .byte 113 + .byte 242 + .byte 114 + .byte 243 + .byte 115 + .byte 244 + .byte 116 + .byte 245 + .byte 117 + .byte 246 + .byte 118 + .byte 247 + .byte 119 + .byte 248 + .byte 120 + .byte 249 + .byte 121 + .byte 250 + .byte 122 + .byte 251 + .byte 123 + .byte 252 + .byte 124 + .byte 253 + .byte 125 + .byte 254 + .byte 126 + .byte 255 + .byte 127 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_4, @object + .size table_4, 174 +table_4: + .byte 1 + .byte 0 + .byte 3 + .byte 0 + .byte 7 + .byte 0 + .byte 15 + .byte 0 + .byte 15 + .byte 1 + .byte 15 + .byte 3 + .byte 14 + .byte 7 + .byte 13 + .byte 7 + .byte 11 + .byte 7 + .byte 7 + .byte 7 + .byte 15 + .byte 6 + .byte 15 + .byte 5 + .byte 14 + .byte 3 + .byte 12 + .byte 7 + .byte 9 + .byte 7 + .byte 3 + .byte 7 + .byte 7 + .byte 6 + .byte 15 + .byte 4 + .byte 14 + .byte 1 + .byte 13 + .byte 3 + .byte 10 + .byte 7 + .byte 5 + .byte 7 + .byte 11 + .byte 6 + .byte 7 + .byte 5 + .byte 14 + .byte 2 + .byte 12 + .byte 5 + .byte 8 + .byte 3 + .byte 0 + .byte 7 + .byte 1 + .byte 6 + .byte 3 + .byte 4 + .byte 6 + .byte 0 + .byte 13 + .byte 0 + .byte 11 + .byte 1 + .byte 7 + .byte 3 + .byte 14 + .byte 6 + .byte 13 + .byte 5 + .byte 10 + .byte 3 + .byte 4 + .byte 7 + .byte 9 + .byte 6 + .byte 3 + .byte 5 + .byte 6 + .byte 2 + .byte 12 + .byte 4 + .byte 8 + .byte 1 + .byte 1 + .byte 3 + .byte 2 + .byte 6 + .byte 5 + .byte 4 + .byte 10 + .byte 0 + .byte 5 + .byte 1 + .byte 11 + .byte 2 + .byte 6 + .byte 5 + .byte 12 + .byte 2 + .byte 8 + .byte 5 + .byte 0 + .byte 3 + .byte 0 + .byte 6 + .byte 1 + .byte 4 + .byte 2 + .byte 0 + .byte 5 + .byte 0 + .byte 11 + .byte 0 + .byte 7 + .byte 1 + .byte 15 + .byte 2 + .byte 14 + .byte 5 + .byte 12 + .byte 3 + .byte 8 + .byte 7 + .byte 1 + .byte 7 + .byte 3 + .byte 6 + .byte 7 + .byte 4 + .byte 14 + .byte 0 + .byte 13 + .byte 1 + .byte 11 + .byte 3 + .byte 6 + .byte 7 + .byte 13 + .byte 6 + .byte 11 + .byte 5 + .byte 6 + .byte 3 + .byte 12 + .byte 6 + .byte 9 + .byte 5 + .byte 2 + .byte 3 + .byte 4 + .byte 6 + .byte 9 + .byte 4 + .byte 2 + .byte 1 + .byte 5 + .byte 2 + .byte 10 + .byte 4 + .byte 4 + .byte 1 + .byte 9 + .byte 2 + .byte 2 + .byte 5 + .byte 4 + .byte 2 + .byte 8 + .byte 4 + .byte 0 + .byte 1 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_5, @object + .size table_5, 256 +table_5: + .byte 204 + .byte 198 + .byte 201 + .byte 192 + .byte 193 + .byte 202 + .byte 194 + .byte 203 + .byte 195 + .byte 200 + .byte 197 + .byte 205 + .byte 196 + .byte 206 + .byte 199 + .byte 207 + .byte 108 + .byte 102 + .byte 105 + .byte 96 + .byte 97 + .byte 106 + .byte 98 + .byte 107 + .byte 99 + .byte 104 + .byte 101 + .byte 109 + .byte 100 + .byte 110 + .byte 103 + .byte 111 + .byte 156 + .byte 150 + .byte 153 + .byte 144 + .byte 145 + .byte 154 + .byte 146 + .byte 155 + .byte 147 + .byte 152 + .byte 149 + .byte 157 + .byte 148 + .byte 158 + .byte 151 + .byte 159 + .byte 12 + .byte 6 + .byte 9 + .byte 0 + .byte 1 + .byte 10 + .byte 2 + .byte 11 + .byte 3 + .byte 8 + .byte 5 + .byte 13 + .byte 4 + .byte 14 + .byte 7 + .byte 15 + .byte 28 + .byte 22 + .byte 25 + .byte 16 + .byte 17 + .byte 26 + .byte 18 + .byte 27 + .byte 19 + .byte 24 + .byte 21 + .byte 29 + .byte 20 + .byte 30 + .byte 23 + .byte 31 + .byte 172 + .byte 166 + .byte 169 + .byte 160 + .byte 161 + .byte 170 + .byte 162 + .byte 171 + .byte 163 + .byte 168 + .byte 165 + .byte 173 + .byte 164 + .byte 174 + .byte 167 + .byte 175 + .byte 44 + .byte 38 + .byte 41 + .byte 32 + .byte 33 + .byte 42 + .byte 34 + .byte 43 + .byte 35 + .byte 40 + .byte 37 + .byte 45 + .byte 36 + .byte 46 + .byte 39 + .byte 47 + .byte 188 + .byte 182 + .byte 185 + .byte 176 + .byte 177 + .byte 186 + .byte 178 + .byte 187 + .byte 179 + .byte 184 + .byte 181 + .byte 189 + .byte 180 + .byte 190 + .byte 183 + .byte 191 + .byte 60 + .byte 54 + .byte 57 + .byte 48 + .byte 49 + .byte 58 + .byte 50 + .byte 59 + .byte 51 + .byte 56 + .byte 53 + .byte 61 + .byte 52 + .byte 62 + .byte 55 + .byte 63 + .byte 140 + .byte 134 + .byte 137 + .byte 128 + .byte 129 + .byte 138 + .byte 130 + .byte 139 + .byte 131 + .byte 136 + .byte 133 + .byte 141 + .byte 132 + .byte 142 + .byte 135 + .byte 143 + .byte 92 + .byte 86 + .byte 89 + .byte 80 + .byte 81 + .byte 90 + .byte 82 + .byte 91 + .byte 83 + .byte 88 + .byte 85 + .byte 93 + .byte 84 + .byte 94 + .byte 87 + .byte 95 + .byte 220 + .byte 214 + .byte 217 + .byte 208 + .byte 209 + .byte 218 + .byte 210 + .byte 219 + .byte 211 + .byte 216 + .byte 213 + .byte 221 + .byte 212 + .byte 222 + .byte 215 + .byte 223 + .byte 76 + .byte 70 + .byte 73 + .byte 64 + .byte 65 + .byte 74 + .byte 66 + .byte 75 + .byte 67 + .byte 72 + .byte 69 + .byte 77 + .byte 68 + .byte 78 + .byte 71 + .byte 79 + .byte 236 + .byte 230 + .byte 233 + .byte 224 + .byte 225 + .byte 234 + .byte 226 + .byte 235 + .byte 227 + .byte 232 + .byte 229 + .byte 237 + .byte 228 + .byte 238 + .byte 231 + .byte 239 + .byte 124 + .byte 118 + .byte 121 + .byte 112 + .byte 113 + .byte 122 + .byte 114 + .byte 123 + .byte 115 + .byte 120 + .byte 117 + .byte 125 + .byte 116 + .byte 126 + .byte 119 + .byte 127 + .byte 252 + .byte 246 + .byte 249 + .byte 240 + .byte 241 + .byte 250 + .byte 242 + .byte 251 + .byte 243 + .byte 248 + .byte 245 + .byte 253 + .byte 244 + .byte 254 + .byte 247 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_6, @object + .size table_6, 256 +table_6: + .byte 51 + .byte 52 + .byte 54 + .byte 56 + .byte 60 + .byte 58 + .byte 49 + .byte 62 + .byte 57 + .byte 50 + .byte 53 + .byte 55 + .byte 48 + .byte 59 + .byte 61 + .byte 63 + .byte 67 + .byte 68 + .byte 70 + .byte 72 + .byte 76 + .byte 74 + .byte 65 + .byte 78 + .byte 73 + .byte 66 + .byte 69 + .byte 71 + .byte 64 + .byte 75 + .byte 77 + .byte 79 + .byte 99 + .byte 100 + .byte 102 + .byte 104 + .byte 108 + .byte 106 + .byte 97 + .byte 110 + .byte 105 + .byte 98 + .byte 101 + .byte 103 + .byte 96 + .byte 107 + .byte 109 + .byte 111 + .byte 131 + .byte 132 + .byte 134 + .byte 136 + .byte 140 + .byte 138 + .byte 129 + .byte 142 + .byte 137 + .byte 130 + .byte 133 + .byte 135 + .byte 128 + .byte 139 + .byte 141 + .byte 143 + .byte 195 + .byte 196 + .byte 198 + .byte 200 + .byte 204 + .byte 202 + .byte 193 + .byte 206 + .byte 201 + .byte 194 + .byte 197 + .byte 199 + .byte 192 + .byte 203 + .byte 205 + .byte 207 + .byte 163 + .byte 164 + .byte 166 + .byte 168 + .byte 172 + .byte 170 + .byte 161 + .byte 174 + .byte 169 + .byte 162 + .byte 165 + .byte 167 + .byte 160 + .byte 171 + .byte 173 + .byte 175 + .byte 19 + .byte 20 + .byte 22 + .byte 24 + .byte 28 + .byte 26 + .byte 17 + .byte 30 + .byte 25 + .byte 18 + .byte 21 + .byte 23 + .byte 16 + .byte 27 + .byte 29 + .byte 31 + .byte 227 + .byte 228 + .byte 230 + .byte 232 + .byte 236 + .byte 234 + .byte 225 + .byte 238 + .byte 233 + .byte 226 + .byte 229 + .byte 231 + .byte 224 + .byte 235 + .byte 237 + .byte 239 + .byte 147 + .byte 148 + .byte 150 + .byte 152 + .byte 156 + .byte 154 + .byte 145 + .byte 158 + .byte 153 + .byte 146 + .byte 149 + .byte 151 + .byte 144 + .byte 155 + .byte 157 + .byte 159 + .byte 35 + .byte 36 + .byte 38 + .byte 40 + .byte 44 + .byte 42 + .byte 33 + .byte 46 + .byte 41 + .byte 34 + .byte 37 + .byte 39 + .byte 32 + .byte 43 + .byte 45 + .byte 47 + .byte 83 + .byte 84 + .byte 86 + .byte 88 + .byte 92 + .byte 90 + .byte 81 + .byte 94 + .byte 89 + .byte 82 + .byte 85 + .byte 87 + .byte 80 + .byte 91 + .byte 93 + .byte 95 + .byte 115 + .byte 116 + .byte 118 + .byte 120 + .byte 124 + .byte 122 + .byte 113 + .byte 126 + .byte 121 + .byte 114 + .byte 117 + .byte 119 + .byte 112 + .byte 123 + .byte 125 + .byte 127 + .byte 3 + .byte 4 + .byte 6 + .byte 8 + .byte 12 + .byte 10 + .byte 1 + .byte 14 + .byte 9 + .byte 2 + .byte 5 + .byte 7 + .byte 0 + .byte 11 + .byte 13 + .byte 15 + .byte 179 + .byte 180 + .byte 182 + .byte 184 + .byte 188 + .byte 186 + .byte 177 + .byte 190 + .byte 185 + .byte 178 + .byte 181 + .byte 183 + .byte 176 + .byte 187 + .byte 189 + .byte 191 + .byte 211 + .byte 212 + .byte 214 + .byte 216 + .byte 220 + .byte 218 + .byte 209 + .byte 222 + .byte 217 + .byte 210 + .byte 213 + .byte 215 + .byte 208 + .byte 219 + .byte 221 + .byte 223 + .byte 243 + .byte 244 + .byte 246 + .byte 248 + .byte 252 + .byte 250 + .byte 241 + .byte 254 + .byte 249 + .byte 242 + .byte 245 + .byte 247 + .byte 240 + .byte 251 + .byte 253 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_7, @object + .size table_7, 256 +table_7: + .byte 0 + .byte 2 + .byte 4 + .byte 6 + .byte 9 + .byte 11 + .byte 13 + .byte 15 + .byte 1 + .byte 3 + .byte 5 + .byte 7 + .byte 8 + .byte 10 + .byte 12 + .byte 14 + .byte 32 + .byte 34 + .byte 36 + .byte 38 + .byte 41 + .byte 43 + .byte 45 + .byte 47 + .byte 33 + .byte 35 + .byte 37 + .byte 39 + .byte 40 + .byte 42 + .byte 44 + .byte 46 + .byte 64 + .byte 66 + .byte 68 + .byte 70 + .byte 73 + .byte 75 + .byte 77 + .byte 79 + .byte 65 + .byte 67 + .byte 69 + .byte 71 + .byte 72 + .byte 74 + .byte 76 + .byte 78 + .byte 96 + .byte 98 + .byte 100 + .byte 102 + .byte 105 + .byte 107 + .byte 109 + .byte 111 + .byte 97 + .byte 99 + .byte 101 + .byte 103 + .byte 104 + .byte 106 + .byte 108 + .byte 110 + .byte 144 + .byte 146 + .byte 148 + .byte 150 + .byte 153 + .byte 155 + .byte 157 + .byte 159 + .byte 145 + .byte 147 + .byte 149 + .byte 151 + .byte 152 + .byte 154 + .byte 156 + .byte 158 + .byte 176 + .byte 178 + .byte 180 + .byte 182 + .byte 185 + .byte 187 + .byte 189 + .byte 191 + .byte 177 + .byte 179 + .byte 181 + .byte 183 + .byte 184 + .byte 186 + .byte 188 + .byte 190 + .byte 208 + .byte 210 + .byte 212 + .byte 214 + .byte 217 + .byte 219 + .byte 221 + .byte 223 + .byte 209 + .byte 211 + .byte 213 + .byte 215 + .byte 216 + .byte 218 + .byte 220 + .byte 222 + .byte 240 + .byte 242 + .byte 244 + .byte 246 + .byte 249 + .byte 251 + .byte 253 + .byte 255 + .byte 241 + .byte 243 + .byte 245 + .byte 247 + .byte 248 + .byte 250 + .byte 252 + .byte 254 + .byte 16 + .byte 18 + .byte 20 + .byte 22 + .byte 25 + .byte 27 + .byte 29 + .byte 31 + .byte 17 + .byte 19 + .byte 21 + .byte 23 + .byte 24 + .byte 26 + .byte 28 + .byte 30 + .byte 48 + .byte 50 + .byte 52 + .byte 54 + .byte 57 + .byte 59 + .byte 61 + .byte 63 + .byte 49 + .byte 51 + .byte 53 + .byte 55 + .byte 56 + .byte 58 + .byte 60 + .byte 62 + .byte 80 + .byte 82 + .byte 84 + .byte 86 + .byte 89 + .byte 91 + .byte 93 + .byte 95 + .byte 81 + .byte 83 + .byte 85 + .byte 87 + .byte 88 + .byte 90 + .byte 92 + .byte 94 + .byte 112 + .byte 114 + .byte 116 + .byte 118 + .byte 121 + .byte 123 + .byte 125 + .byte 127 + .byte 113 + .byte 115 + .byte 117 + .byte 119 + .byte 120 + .byte 122 + .byte 124 + .byte 126 + .byte 128 + .byte 130 + .byte 132 + .byte 134 + .byte 137 + .byte 139 + .byte 141 + .byte 143 + .byte 129 + .byte 131 + .byte 133 + .byte 135 + .byte 136 + .byte 138 + .byte 140 + .byte 142 + .byte 160 + .byte 162 + .byte 164 + .byte 166 + .byte 169 + .byte 171 + .byte 173 + .byte 175 + .byte 161 + .byte 163 + .byte 165 + .byte 167 + .byte 168 + .byte 170 + .byte 172 + .byte 174 + .byte 192 + .byte 194 + .byte 196 + .byte 198 + .byte 201 + .byte 203 + .byte 205 + .byte 207 + .byte 193 + .byte 195 + .byte 197 + .byte 199 + .byte 200 + .byte 202 + .byte 204 + .byte 206 + .byte 224 + .byte 226 + .byte 228 + .byte 230 + .byte 233 + .byte 235 + .byte 237 + .byte 239 + .byte 225 + .byte 227 + .byte 229 + .byte 231 + .byte 232 + .byte 234 + .byte 236 + .byte 238 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_8, @object + .size table_8, 256 +table_8: + .byte 0 + .byte 8 + .byte 1 + .byte 9 + .byte 2 + .byte 10 + .byte 3 + .byte 11 + .byte 12 + .byte 4 + .byte 13 + .byte 5 + .byte 14 + .byte 6 + .byte 15 + .byte 7 + .byte 128 + .byte 136 + .byte 129 + .byte 137 + .byte 130 + .byte 138 + .byte 131 + .byte 139 + .byte 140 + .byte 132 + .byte 141 + .byte 133 + .byte 142 + .byte 134 + .byte 143 + .byte 135 + .byte 16 + .byte 24 + .byte 17 + .byte 25 + .byte 18 + .byte 26 + .byte 19 + .byte 27 + .byte 28 + .byte 20 + .byte 29 + .byte 21 + .byte 30 + .byte 22 + .byte 31 + .byte 23 + .byte 144 + .byte 152 + .byte 145 + .byte 153 + .byte 146 + .byte 154 + .byte 147 + .byte 155 + .byte 156 + .byte 148 + .byte 157 + .byte 149 + .byte 158 + .byte 150 + .byte 159 + .byte 151 + .byte 32 + .byte 40 + .byte 33 + .byte 41 + .byte 34 + .byte 42 + .byte 35 + .byte 43 + .byte 44 + .byte 36 + .byte 45 + .byte 37 + .byte 46 + .byte 38 + .byte 47 + .byte 39 + .byte 160 + .byte 168 + .byte 161 + .byte 169 + .byte 162 + .byte 170 + .byte 163 + .byte 171 + .byte 172 + .byte 164 + .byte 173 + .byte 165 + .byte 174 + .byte 166 + .byte 175 + .byte 167 + .byte 48 + .byte 56 + .byte 49 + .byte 57 + .byte 50 + .byte 58 + .byte 51 + .byte 59 + .byte 60 + .byte 52 + .byte 61 + .byte 53 + .byte 62 + .byte 54 + .byte 63 + .byte 55 + .byte 176 + .byte 184 + .byte 177 + .byte 185 + .byte 178 + .byte 186 + .byte 179 + .byte 187 + .byte 188 + .byte 180 + .byte 189 + .byte 181 + .byte 190 + .byte 182 + .byte 191 + .byte 183 + .byte 192 + .byte 200 + .byte 193 + .byte 201 + .byte 194 + .byte 202 + .byte 195 + .byte 203 + .byte 204 + .byte 196 + .byte 205 + .byte 197 + .byte 206 + .byte 198 + .byte 207 + .byte 199 + .byte 64 + .byte 72 + .byte 65 + .byte 73 + .byte 66 + .byte 74 + .byte 67 + .byte 75 + .byte 76 + .byte 68 + .byte 77 + .byte 69 + .byte 78 + .byte 70 + .byte 79 + .byte 71 + .byte 208 + .byte 216 + .byte 209 + .byte 217 + .byte 210 + .byte 218 + .byte 211 + .byte 219 + .byte 220 + .byte 212 + .byte 221 + .byte 213 + .byte 222 + .byte 214 + .byte 223 + .byte 215 + .byte 80 + .byte 88 + .byte 81 + .byte 89 + .byte 82 + .byte 90 + .byte 83 + .byte 91 + .byte 92 + .byte 84 + .byte 93 + .byte 85 + .byte 94 + .byte 86 + .byte 95 + .byte 87 + .byte 224 + .byte 232 + .byte 225 + .byte 233 + .byte 226 + .byte 234 + .byte 227 + .byte 235 + .byte 236 + .byte 228 + .byte 237 + .byte 229 + .byte 238 + .byte 230 + .byte 239 + .byte 231 + .byte 96 + .byte 104 + .byte 97 + .byte 105 + .byte 98 + .byte 106 + .byte 99 + .byte 107 + .byte 108 + .byte 100 + .byte 109 + .byte 101 + .byte 110 + .byte 102 + .byte 111 + .byte 103 + .byte 240 + .byte 248 + .byte 241 + .byte 249 + .byte 242 + .byte 250 + .byte 243 + .byte 251 + .byte 252 + .byte 244 + .byte 253 + .byte 245 + .byte 254 + .byte 246 + .byte 255 + .byte 247 + .byte 112 + .byte 120 + .byte 113 + .byte 121 + .byte 114 + .byte 122 + .byte 115 + .byte 123 + .byte 124 + .byte 116 + .byte 125 + .byte 117 + .byte 126 + .byte 118 + .byte 127 + .byte 119 + + .text +.global forkskinny_128_256_rounds + .type forkskinny_128_256_rounds, @function +forkskinny_128_256_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 51 + ldd r2,Z+32 + ldd r3,Z+33 + ldd r4,Z+34 + ldd r5,Z+35 + ldd r6,Z+36 + ldd r7,Z+37 + ldd r8,Z+38 + ldd r9,Z+39 + ldd r10,Z+40 + ldd r11,Z+41 + ldd r12,Z+42 + ldd r13,Z+43 + ldd r14,Z+44 + ldd r15,Z+45 + ldd r24,Z+46 + ldd r25,Z+47 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + lsl r20 + std Y+33,r20 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +86: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + mov r0,r14 + mov r14,r15 + mov r15,r24 + mov r24,r25 + mov r25,r0 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + movw r18,r14 + movw r26,r24 + eor r18,r10 + eor r19,r11 + eor r26,r12 + eor r27,r13 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r18 + movw r4,r26 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + ldd r0,Y+1 + std Y+9,r0 + ldd r0,Y+2 + std Y+10,r0 + ldd r0,Y+3 + std Y+11,r0 + ldd r0,Y+4 + std Y+12,r0 + ldd r0,Y+5 + std Y+13,r0 + ldd r0,Y+6 + std Y+14,r0 + ldd r0,Y+7 + std Y+15,r0 + ldd r0,Y+8 + std Y+16,r0 + std Y+1,r19 + std Y+2,r17 + std Y+3,r18 + std Y+4,r21 + std Y+5,r26 + std Y+6,r16 + std Y+7,r20 + std Y+8,r27 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r16,Y+31 + ldd r17,Y+32 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+17 + std Y+25,r0 + ldd r0,Y+18 + std Y+26,r0 + ldd r0,Y+19 + std Y+27,r0 + ldd r0,Y+20 + std Y+28,r0 + ldd r0,Y+21 + std Y+29,r0 + ldd r0,Y+22 + std Y+30,r0 + ldd r0,Y+23 + std Y+31,r0 + ldd r0,Y+24 + std Y+32,r0 + std Y+17,r19 + std Y+18,r17 + std Y+19,r18 + std Y+20,r21 + std Y+21,r26 + std Y+22,r16 + std Y+23,r20 + std Y+24,r27 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + ldd r18,Y+33 + cp r22,r18 + breq 5259f + rjmp 86b +5259: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+32,r2 + std Z+33,r3 + std Z+34,r4 + std Z+35,r5 + std Z+36,r6 + std Z+37,r7 + std Z+38,r8 + std Z+39,r9 + std Z+40,r10 + std Z+41,r11 + std Z+42,r12 + std Z+43,r13 + std Z+44,r14 + std Z+45,r15 + std Z+46,r24 + std Z+47,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + adiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_256_rounds, .-forkskinny_128_256_rounds + + .text +.global forkskinny_128_256_inv_rounds + .type forkskinny_128_256_inv_rounds, @function +forkskinny_128_256_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 51 + ldd r2,Z+32 + ldd r3,Z+33 + ldd r4,Z+34 + ldd r5,Z+35 + ldd r6,Z+36 + ldd r7,Z+37 + ldd r8,Z+38 + ldd r9,Z+39 + ldd r10,Z+40 + ldd r11,Z+41 + ldd r12,Z+42 + ldd r13,Z+43 + ldd r14,Z+44 + ldd r15,Z+45 + ldd r24,Z+46 + ldd r25,Z+47 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + lsl r20 + std Y+33,r20 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +86: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + ldd r0,Y+9 + std Y+1,r0 + ldd r0,Y+10 + std Y+2,r0 + ldd r0,Y+11 + std Y+3,r0 + ldd r0,Y+12 + std Y+4,r0 + ldd r0,Y+13 + std Y+5,r0 + ldd r0,Y+14 + std Y+6,r0 + ldd r0,Y+15 + std Y+7,r0 + ldd r0,Y+16 + std Y+8,r0 + std Y+9,r26 + std Y+10,r18 + std Y+11,r20 + std Y+12,r17 + std Y+13,r16 + std Y+14,r27 + std Y+15,r21 + std Y+16,r19 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r16,Y+23 + ldd r17,Y+24 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+25 + std Y+17,r0 + ldd r0,Y+26 + std Y+18,r0 + ldd r0,Y+27 + std Y+19,r0 + ldd r0,Y+28 + std Y+20,r0 + ldd r0,Y+29 + std Y+21,r0 + ldd r0,Y+30 + std Y+22,r0 + ldd r0,Y+31 + std Y+23,r0 + ldd r0,Y+32 + std Y+24,r0 + std Y+25,r26 + std Y+26,r18 + std Y+27,r20 + std Y+28,r17 + std Y+29,r16 + std Y+30,r27 + std Y+31,r21 + std Y+32,r19 + movw r18,r2 + movw r26,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r18 + movw r24,r26 + eor r14,r10 + eor r15,r11 + eor r24,r12 + eor r25,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + mov r0,r6 + mov r6,r7 + mov r7,r8 + mov r8,r9 + mov r9,r0 + mov r0,r10 + mov r10,r12 + mov r12,r0 + mov r0,r11 + mov r11,r13 + mov r13,r0 + mov r0,r25 + mov r25,r24 + mov r24,r15 + mov r15,r14 + mov r14,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + ldi r30,lo8(table_1) + ldi r31,hi8(table_1) +#if defined(RAMPZ) + ldi r18,hh8(table_1) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+33 + cp r22,r18 + breq 5259f + rjmp 86b +5259: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+32,r2 + std Z+33,r3 + std Z+34,r4 + std Z+35,r5 + std Z+36,r6 + std Z+37,r7 + std Z+38,r8 + std Z+39,r9 + std Z+40,r10 + std Z+41,r11 + std Z+42,r12 + std Z+43,r13 + std Z+44,r14 + std Z+45,r15 + std Z+46,r24 + std Z+47,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + adiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_256_inv_rounds, .-forkskinny_128_256_inv_rounds + + .text +.global forkskinny_128_256_forward_tk + .type forkskinny_128_256_forward_tk, @function +forkskinny_128_256_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 34 + ld r4,Z + ldd r5,Z+1 + ldd r6,Z+2 + ldd r7,Z+3 + ldd r8,Z+4 + ldd r9,Z+5 + ldd r10,Z+6 + ldd r11,Z+7 + ldd r12,Z+8 + ldd r13,Z+9 + ldd r14,Z+10 + ldd r15,Z+11 + ldd r24,Z+12 + ldd r25,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + push r31 + push r30 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r23,hh8(table_2) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +51: + movw r18,r12 + movw r20,r14 + movw r26,r24 + movw r2,r16 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + mov r4,r19 + mov r5,r3 + mov r6,r18 + mov r7,r27 + mov r8,r20 + mov r9,r2 + mov r10,r26 + mov r11,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r26,Y+13 + ldd r27,Y+14 + ldd r2,Y+15 + ldd r3,Y+16 + ldd r23,Y+1 + std Y+9,r23 + ldd r23,Y+2 + std Y+10,r23 + ldd r23,Y+3 + std Y+11,r23 + ldd r23,Y+4 + std Y+12,r23 + ldd r23,Y+5 + std Y+13,r23 + ldd r23,Y+6 + std Y+14,r23 + ldd r23,Y+7 + std Y+15,r23 + ldd r23,Y+8 + std Y+16,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+1,r19 + std Y+2,r3 + std Y+3,r18 + std Y+4,r27 + std Y+5,r20 + std Y+6,r2 + std Y+7,r26 + std Y+8,r21 + dec r22 + breq 5109f + rjmp 51b +5109: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r4 + std Z+1,r5 + std Z+2,r6 + std Z+3,r7 + std Z+4,r8 + std Z+5,r9 + std Z+6,r10 + std Z+7,r11 + std Z+8,r12 + std Z+9,r13 + std Z+10,r14 + std Z+11,r15 + std Z+12,r24 + std Z+13,r25 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_256_forward_tk, .-forkskinny_128_256_forward_tk + + .text +.global forkskinny_128_256_reverse_tk + .type forkskinny_128_256_reverse_tk, @function +forkskinny_128_256_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 34 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r23,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +51: + movw r18,r2 + movw r20,r4 + movw r26,r6 + movw r24,r8 + movw r2,r10 + movw r4,r12 + movw r6,r14 + movw r8,r16 + mov r10,r20 + mov r11,r18 + mov r12,r26 + mov r13,r25 + mov r14,r24 + mov r15,r21 + mov r16,r27 + mov r17,r19 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r24,Y+7 + ldd r25,Y+8 + ldd r23,Y+9 + std Y+1,r23 + ldd r23,Y+10 + std Y+2,r23 + ldd r23,Y+11 + std Y+3,r23 + ldd r23,Y+12 + std Y+4,r23 + ldd r23,Y+13 + std Y+5,r23 + ldd r23,Y+14 + std Y+6,r23 + ldd r23,Y+15 + std Y+7,r23 + ldd r23,Y+16 + std Y+8,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+9,r20 + std Y+10,r18 + std Y+11,r26 + std Y+12,r25 + std Y+13,r24 + std Y+14,r21 + std Y+15,r27 + std Y+16,r19 + dec r22 + breq 5109f + rjmp 51b +5109: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r2 + std Z+1,r3 + std Z+2,r4 + std Z+3,r5 + std Z+4,r6 + std Z+5,r7 + std Z+6,r8 + std Z+7,r9 + std Z+8,r10 + std Z+9,r11 + std Z+10,r12 + std Z+11,r13 + std Z+12,r14 + std Z+13,r15 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_256_reverse_tk, .-forkskinny_128_256_reverse_tk + + .text +.global forkskinny_128_384_rounds + .type forkskinny_128_384_rounds, @function +forkskinny_128_384_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 67 + ldd r2,Z+48 + ldd r3,Z+49 + ldd r4,Z+50 + ldd r5,Z+51 + ldd r6,Z+52 + ldd r7,Z+53 + ldd r8,Z+54 + ldd r9,Z+55 + ldd r10,Z+56 + ldd r11,Z+57 + ldd r12,Z+58 + ldd r13,Z+59 + ldd r14,Z+60 + ldd r15,Z+61 + ldd r24,Z+62 + ldd r25,Z+63 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r26,Z+34 + ldd r27,Z+35 + std Y+33,r18 + std Y+34,r19 + std Y+35,r26 + std Y+36,r27 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + std Y+37,r18 + std Y+38,r19 + std Y+39,r26 + std Y+40,r27 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r26,Z+42 + ldd r27,Z+43 + std Y+41,r18 + std Y+42,r19 + std Y+43,r26 + std Y+44,r27 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r26,Z+46 + ldd r27,Z+47 + std Y+45,r18 + std Y+46,r19 + std Y+47,r26 + std Y+48,r27 + lsl r20 + std Y+49,r20 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +118: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + mov r0,r14 + mov r14,r15 + mov r15,r24 + mov r24,r25 + mov r25,r0 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + movw r18,r14 + movw r26,r24 + eor r18,r10 + eor r19,r11 + eor r26,r12 + eor r27,r13 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r18 + movw r4,r26 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + ldd r0,Y+1 + std Y+9,r0 + ldd r0,Y+2 + std Y+10,r0 + ldd r0,Y+3 + std Y+11,r0 + ldd r0,Y+4 + std Y+12,r0 + ldd r0,Y+5 + std Y+13,r0 + ldd r0,Y+6 + std Y+14,r0 + ldd r0,Y+7 + std Y+15,r0 + ldd r0,Y+8 + std Y+16,r0 + std Y+1,r19 + std Y+2,r17 + std Y+3,r18 + std Y+4,r21 + std Y+5,r26 + std Y+6,r16 + std Y+7,r20 + std Y+8,r27 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r16,Y+31 + ldd r17,Y+32 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+17 + std Y+25,r0 + ldd r0,Y+18 + std Y+26,r0 + ldd r0,Y+19 + std Y+27,r0 + ldd r0,Y+20 + std Y+28,r0 + ldd r0,Y+21 + std Y+29,r0 + ldd r0,Y+22 + std Y+30,r0 + ldd r0,Y+23 + std Y+31,r0 + ldd r0,Y+24 + std Y+32,r0 + std Y+17,r19 + std Y+18,r17 + std Y+19,r18 + std Y+20,r21 + std Y+21,r26 + std Y+22,r16 + std Y+23,r20 + std Y+24,r27 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + ldd r20,Y+45 + ldd r21,Y+46 + ldd r16,Y+47 + ldd r17,Y+48 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+33 + std Y+41,r0 + ldd r0,Y+34 + std Y+42,r0 + ldd r0,Y+35 + std Y+43,r0 + ldd r0,Y+36 + std Y+44,r0 + ldd r0,Y+37 + std Y+45,r0 + ldd r0,Y+38 + std Y+46,r0 + ldd r0,Y+39 + std Y+47,r0 + ldd r0,Y+40 + std Y+48,r0 + std Y+33,r19 + std Y+34,r17 + std Y+35,r18 + std Y+36,r21 + std Y+37,r26 + std Y+38,r16 + std Y+39,r20 + std Y+40,r27 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + ldd r18,Y+49 + cp r22,r18 + breq 5348f + rjmp 118b +5348: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+48,r2 + std Z+49,r3 + std Z+50,r4 + std Z+51,r5 + std Z+52,r6 + std Z+53,r7 + std Z+54,r8 + std Z+55,r9 + std Z+56,r10 + std Z+57,r11 + std Z+58,r12 + std Z+59,r13 + std Z+60,r14 + std Z+61,r15 + std Z+62,r24 + std Z+63,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + std Z+32,r18 + std Z+33,r19 + std Z+34,r26 + std Z+35,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + std Z+36,r18 + std Z+37,r19 + std Z+38,r26 + std Z+39,r27 + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + std Z+40,r18 + std Z+41,r19 + std Z+42,r26 + std Z+43,r27 + ldd r18,Y+45 + ldd r19,Y+46 + ldd r26,Y+47 + ldd r27,Y+48 + std Z+44,r18 + std Z+45,r19 + std Z+46,r26 + std Z+47,r27 + adiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_384_rounds, .-forkskinny_128_384_rounds + + .text +.global forkskinny_128_384_inv_rounds + .type forkskinny_128_384_inv_rounds, @function +forkskinny_128_384_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 67 + ldd r2,Z+48 + ldd r3,Z+49 + ldd r4,Z+50 + ldd r5,Z+51 + ldd r6,Z+52 + ldd r7,Z+53 + ldd r8,Z+54 + ldd r9,Z+55 + ldd r10,Z+56 + ldd r11,Z+57 + ldd r12,Z+58 + ldd r13,Z+59 + ldd r14,Z+60 + ldd r15,Z+61 + ldd r24,Z+62 + ldd r25,Z+63 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r26,Z+34 + ldd r27,Z+35 + std Y+33,r18 + std Y+34,r19 + std Y+35,r26 + std Y+36,r27 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + std Y+37,r18 + std Y+38,r19 + std Y+39,r26 + std Y+40,r27 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r26,Z+42 + ldd r27,Z+43 + std Y+41,r18 + std Y+42,r19 + std Y+43,r26 + std Y+44,r27 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r26,Z+46 + ldd r27,Z+47 + std Y+45,r18 + std Y+46,r19 + std Y+47,r26 + std Y+48,r27 + lsl r20 + std Y+49,r20 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +118: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + ldd r0,Y+9 + std Y+1,r0 + ldd r0,Y+10 + std Y+2,r0 + ldd r0,Y+11 + std Y+3,r0 + ldd r0,Y+12 + std Y+4,r0 + ldd r0,Y+13 + std Y+5,r0 + ldd r0,Y+14 + std Y+6,r0 + ldd r0,Y+15 + std Y+7,r0 + ldd r0,Y+16 + std Y+8,r0 + std Y+9,r26 + std Y+10,r18 + std Y+11,r20 + std Y+12,r17 + std Y+13,r16 + std Y+14,r27 + std Y+15,r21 + std Y+16,r19 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r16,Y+23 + ldd r17,Y+24 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+25 + std Y+17,r0 + ldd r0,Y+26 + std Y+18,r0 + ldd r0,Y+27 + std Y+19,r0 + ldd r0,Y+28 + std Y+20,r0 + ldd r0,Y+29 + std Y+21,r0 + ldd r0,Y+30 + std Y+22,r0 + ldd r0,Y+31 + std Y+23,r0 + ldd r0,Y+32 + std Y+24,r0 + std Y+25,r26 + std Y+26,r18 + std Y+27,r20 + std Y+28,r17 + std Y+29,r16 + std Y+30,r27 + std Y+31,r21 + std Y+32,r19 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + ldd r20,Y+37 + ldd r21,Y+38 + ldd r16,Y+39 + ldd r17,Y+40 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+41 + std Y+33,r0 + ldd r0,Y+42 + std Y+34,r0 + ldd r0,Y+43 + std Y+35,r0 + ldd r0,Y+44 + std Y+36,r0 + ldd r0,Y+45 + std Y+37,r0 + ldd r0,Y+46 + std Y+38,r0 + ldd r0,Y+47 + std Y+39,r0 + ldd r0,Y+48 + std Y+40,r0 + std Y+41,r26 + std Y+42,r18 + std Y+43,r20 + std Y+44,r17 + std Y+45,r16 + std Y+46,r27 + std Y+47,r21 + std Y+48,r19 + movw r18,r2 + movw r26,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r18 + movw r24,r26 + eor r14,r10 + eor r15,r11 + eor r24,r12 + eor r25,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + mov r0,r6 + mov r6,r7 + mov r7,r8 + mov r8,r9 + mov r9,r0 + mov r0,r10 + mov r10,r12 + mov r12,r0 + mov r0,r11 + mov r11,r13 + mov r13,r0 + mov r0,r25 + mov r25,r24 + mov r24,r15 + mov r15,r14 + mov r14,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + ldi r30,lo8(table_1) + ldi r31,hi8(table_1) +#if defined(RAMPZ) + ldi r18,hh8(table_1) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+49 + cp r22,r18 + breq 5348f + rjmp 118b +5348: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+48,r2 + std Z+49,r3 + std Z+50,r4 + std Z+51,r5 + std Z+52,r6 + std Z+53,r7 + std Z+54,r8 + std Z+55,r9 + std Z+56,r10 + std Z+57,r11 + std Z+58,r12 + std Z+59,r13 + std Z+60,r14 + std Z+61,r15 + std Z+62,r24 + std Z+63,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + std Z+32,r18 + std Z+33,r19 + std Z+34,r26 + std Z+35,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + std Z+36,r18 + std Z+37,r19 + std Z+38,r26 + std Z+39,r27 + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + std Z+40,r18 + std Z+41,r19 + std Z+42,r26 + std Z+43,r27 + ldd r18,Y+45 + ldd r19,Y+46 + ldd r26,Y+47 + ldd r27,Y+48 + std Z+44,r18 + std Z+45,r19 + std Z+46,r26 + std Z+47,r27 + adiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_384_inv_rounds, .-forkskinny_128_384_inv_rounds + + .text +.global forkskinny_128_384_forward_tk + .type forkskinny_128_384_forward_tk, @function +forkskinny_128_384_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r4,Z + ldd r5,Z+1 + ldd r6,Z+2 + ldd r7,Z+3 + ldd r8,Z+4 + ldd r9,Z+5 + ldd r10,Z+6 + ldd r11,Z+7 + ldd r12,Z+8 + ldd r13,Z+9 + ldd r14,Z+10 + ldd r15,Z+11 + ldd r24,Z+12 + ldd r25,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + std Y+17,r18 + std Y+18,r19 + std Y+19,r20 + std Y+20,r21 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r20,Z+38 + ldd r21,Z+39 + std Y+21,r18 + std Y+22,r19 + std Y+23,r20 + std Y+24,r21 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r20,Z+42 + ldd r21,Z+43 + std Y+25,r18 + std Y+26,r19 + std Y+27,r20 + std Y+28,r21 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r20,Z+46 + ldd r21,Z+47 + std Y+29,r18 + std Y+30,r19 + std Y+31,r20 + std Y+32,r21 + push r31 + push r30 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r23,hh8(table_2) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +83: + movw r18,r12 + movw r20,r14 + movw r26,r24 + movw r2,r16 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + mov r4,r19 + mov r5,r3 + mov r6,r18 + mov r7,r27 + mov r8,r20 + mov r9,r2 + mov r10,r26 + mov r11,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r26,Y+13 + ldd r27,Y+14 + ldd r2,Y+15 + ldd r3,Y+16 + ldd r23,Y+1 + std Y+9,r23 + ldd r23,Y+2 + std Y+10,r23 + ldd r23,Y+3 + std Y+11,r23 + ldd r23,Y+4 + std Y+12,r23 + ldd r23,Y+5 + std Y+13,r23 + ldd r23,Y+6 + std Y+14,r23 + ldd r23,Y+7 + std Y+15,r23 + ldd r23,Y+8 + std Y+16,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+1,r19 + std Y+2,r3 + std Y+3,r18 + std Y+4,r27 + std Y+5,r20 + std Y+6,r2 + std Y+7,r26 + std Y+8,r21 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + ldd r26,Y+29 + ldd r27,Y+30 + ldd r2,Y+31 + ldd r3,Y+32 + ldd r23,Y+17 + std Y+25,r23 + ldd r23,Y+18 + std Y+26,r23 + ldd r23,Y+19 + std Y+27,r23 + ldd r23,Y+20 + std Y+28,r23 + ldd r23,Y+21 + std Y+29,r23 + ldd r23,Y+22 + std Y+30,r23 + ldd r23,Y+23 + std Y+31,r23 + ldd r23,Y+24 + std Y+32,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+17,r19 + std Y+18,r3 + std Y+19,r18 + std Y+20,r27 + std Y+21,r20 + std Y+22,r2 + std Y+23,r26 + std Y+24,r21 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + breq 5183f + rjmp 83b +5183: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r4 + std Z+1,r5 + std Z+2,r6 + std Z+3,r7 + std Z+4,r8 + std Z+5,r9 + std Z+6,r10 + std Z+7,r11 + std Z+8,r12 + std Z+9,r13 + std Z+10,r14 + std Z+11,r15 + std Z+12,r24 + std Z+13,r25 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r20,Y+23 + ldd r21,Y+24 + std Z+36,r18 + std Z+37,r19 + std Z+38,r20 + std Z+39,r21 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + std Z+40,r18 + std Z+41,r19 + std Z+42,r20 + std Z+43,r21 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r20,Y+31 + ldd r21,Y+32 + std Z+44,r18 + std Z+45,r19 + std Z+46,r20 + std Z+47,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_384_forward_tk, .-forkskinny_128_384_forward_tk + + .text +.global forkskinny_128_384_reverse_tk + .type forkskinny_128_384_reverse_tk, @function +forkskinny_128_384_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + std Y+17,r18 + std Y+18,r19 + std Y+19,r20 + std Y+20,r21 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r20,Z+38 + ldd r21,Z+39 + std Y+21,r18 + std Y+22,r19 + std Y+23,r20 + std Y+24,r21 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r20,Z+42 + ldd r21,Z+43 + std Y+25,r18 + std Y+26,r19 + std Y+27,r20 + std Y+28,r21 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r20,Z+46 + ldd r21,Z+47 + std Y+29,r18 + std Y+30,r19 + std Y+31,r20 + std Y+32,r21 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r23,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +83: + movw r18,r2 + movw r20,r4 + movw r26,r6 + movw r24,r8 + movw r2,r10 + movw r4,r12 + movw r6,r14 + movw r8,r16 + mov r10,r20 + mov r11,r18 + mov r12,r26 + mov r13,r25 + mov r14,r24 + mov r15,r21 + mov r16,r27 + mov r17,r19 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r24,Y+7 + ldd r25,Y+8 + ldd r23,Y+9 + std Y+1,r23 + ldd r23,Y+10 + std Y+2,r23 + ldd r23,Y+11 + std Y+3,r23 + ldd r23,Y+12 + std Y+4,r23 + ldd r23,Y+13 + std Y+5,r23 + ldd r23,Y+14 + std Y+6,r23 + ldd r23,Y+15 + std Y+7,r23 + ldd r23,Y+16 + std Y+8,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+9,r20 + std Y+10,r18 + std Y+11,r26 + std Y+12,r25 + std Y+13,r24 + std Y+14,r21 + std Y+15,r27 + std Y+16,r19 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + ldd r26,Y+21 + ldd r27,Y+22 + ldd r24,Y+23 + ldd r25,Y+24 + ldd r23,Y+25 + std Y+17,r23 + ldd r23,Y+26 + std Y+18,r23 + ldd r23,Y+27 + std Y+19,r23 + ldd r23,Y+28 + std Y+20,r23 + ldd r23,Y+29 + std Y+21,r23 + ldd r23,Y+30 + std Y+22,r23 + ldd r23,Y+31 + std Y+23,r23 + ldd r23,Y+32 + std Y+24,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+25,r20 + std Y+26,r18 + std Y+27,r26 + std Y+28,r25 + std Y+29,r24 + std Y+30,r21 + std Y+31,r27 + std Y+32,r19 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + breq 5183f + rjmp 83b +5183: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r2 + std Z+1,r3 + std Z+2,r4 + std Z+3,r5 + std Z+4,r6 + std Z+5,r7 + std Z+6,r8 + std Z+7,r9 + std Z+8,r10 + std Z+9,r11 + std Z+10,r12 + std Z+11,r13 + std Z+12,r14 + std Z+13,r15 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r20,Y+23 + ldd r21,Y+24 + std Z+36,r18 + std Z+37,r19 + std Z+38,r20 + std Z+39,r21 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + std Z+40,r18 + std Z+41,r19 + std Z+42,r20 + std Z+43,r21 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r20,Y+31 + ldd r21,Y+32 + std Z+44,r18 + std Z+45,r19 + std Z+46,r20 + std Z+47,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_384_reverse_tk, .-forkskinny_128_384_reverse_tk + + .text +.global forkskinny_64_192_rounds + .type forkskinny_64_192_rounds, @function +forkskinny_64_192_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 38 + ldd r26,Z+24 + ldd r27,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + ld r18,Z + ldd r19,Z+1 + std Y+1,r18 + std Y+2,r19 + ldd r18,Z+2 + ldd r19,Z+3 + std Y+3,r18 + std Y+4,r19 + ldd r18,Z+4 + ldd r19,Z+5 + std Y+5,r18 + std Y+6,r19 + ldd r18,Z+6 + ldd r19,Z+7 + std Y+7,r18 + std Y+8,r19 + ldd r18,Z+8 + ldd r19,Z+9 + std Y+9,r18 + std Y+10,r19 + ldd r18,Z+10 + ldd r19,Z+11 + std Y+11,r18 + std Y+12,r19 + ldd r18,Z+12 + ldd r19,Z+13 + std Y+13,r18 + std Y+14,r19 + ldd r18,Z+14 + ldd r19,Z+15 + std Y+15,r18 + std Y+16,r19 + ldd r18,Z+16 + ldd r19,Z+17 + std Y+17,r18 + std Y+18,r19 + ldd r18,Z+18 + ldd r19,Z+19 + std Y+19,r18 + std Y+20,r19 + ldd r18,Z+20 + ldd r19,Z+21 + std Y+21,r18 + std Y+22,r19 + ldd r18,Z+22 + ldd r19,Z+23 + std Y+23,r18 + std Y+24,r19 + push r31 + push r30 + ldi r30,lo8(table_5) + ldi r31,hi8(table_5) +#if defined(RAMPZ) + ldi r18,hh8(table_5) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 + lsl r20 +61: + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + eor r26,r18 + eor r27,r19 + ldd r18,Y+3 + ldd r19,Y+4 + eor r2,r18 + eor r3,r19 + ldd r18,Y+9 + ldd r19,Y+10 + eor r26,r18 + eor r27,r19 + ldd r18,Y+11 + ldd r19,Y+12 + eor r2,r18 + eor r3,r19 + ldd r18,Y+17 + ldd r19,Y+18 + eor r26,r18 + eor r27,r19 + ldd r18,Y+19 + ldd r19,Y+20 + eor r2,r18 + eor r3,r19 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r27,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r3,r18 + ldi r18,32 + eor r5,r18 + eor r26,r18 + mov r0,r1 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + or r3,r0 + mov r0,r4 + mov r4,r5 + mov r5,r0 + mov r0,r6 + mov r6,r7 + mov r7,r0 + mov r0,r1 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + or r7,r0 + eor r2,r4 + eor r3,r5 + eor r4,r26 + eor r5,r27 + movw r18,r6 + eor r18,r4 + eor r19,r5 + movw r6,r4 + movw r4,r2 + movw r2,r26 + movw r26,r18 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r8,Y+3 + ldd r9,Y+4 + ldd r10,Y+5 + ldd r11,Y+6 + ldd r12,Y+7 + ldd r13,Y+8 + std Y+5,r18 + std Y+6,r19 + std Y+7,r8 + std Y+8,r9 + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + ldi r25,240 + and r9,r25 + swap r12 + ldi r24,15 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+1,r18 + std Y+2,r19 + std Y+3,r8 + std Y+4,r9 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r18,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+9 + ldd r19,Y+10 + ldd r8,Y+11 + ldd r9,Y+12 + ldd r10,Y+13 + ldd r11,Y+14 + ldd r12,Y+15 + ldd r13,Y+16 + std Y+13,r18 + std Y+14,r19 + std Y+15,r8 + std Y+16,r9 + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + and r9,r25 + swap r12 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+9,r18 + std Y+10,r19 + std Y+11,r8 + std Y+12,r9 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r8,Y+19 + ldd r9,Y+20 + ldd r10,Y+21 + ldd r11,Y+22 + ldd r12,Y+23 + ldd r13,Y+24 + std Y+21,r18 + std Y+22,r19 + std Y+23,r8 + std Y+24,r9 + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + and r9,r25 + swap r12 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+17,r18 + std Y+18,r19 + std Y+19,r8 + std Y+20,r9 + ldi r30,lo8(table_5) + ldi r31,hi8(table_5) +#if defined(RAMPZ) + ldi r18,hh8(table_5) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + cp r22,r20 + breq 5273f + rjmp 61b +5273: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+24,r26 + std Z+25,r27 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Y+1 + ldd r19,Y+2 + st Z,r18 + std Z+1,r19 + ldd r18,Y+3 + ldd r19,Y+4 + std Z+2,r18 + std Z+3,r19 + ldd r18,Y+5 + ldd r19,Y+6 + std Z+4,r18 + std Z+5,r19 + ldd r18,Y+7 + ldd r19,Y+8 + std Z+6,r18 + std Z+7,r19 + ldd r18,Y+9 + ldd r19,Y+10 + std Z+8,r18 + std Z+9,r19 + ldd r18,Y+11 + ldd r19,Y+12 + std Z+10,r18 + std Z+11,r19 + ldd r18,Y+13 + ldd r19,Y+14 + std Z+12,r18 + std Z+13,r19 + ldd r18,Y+15 + ldd r19,Y+16 + std Z+14,r18 + std Z+15,r19 + ldd r18,Y+17 + ldd r19,Y+18 + std Z+16,r18 + std Z+17,r19 + ldd r18,Y+19 + ldd r19,Y+20 + std Z+18,r18 + std Z+19,r19 + ldd r18,Y+21 + ldd r19,Y+22 + std Z+20,r18 + std Z+21,r19 + ldd r18,Y+23 + ldd r19,Y+24 + std Z+22,r18 + std Z+23,r19 + adiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_rounds, .-forkskinny_64_192_rounds + + .text +.global forkskinny_64_192_inv_rounds + .type forkskinny_64_192_inv_rounds, @function +forkskinny_64_192_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 38 + ldd r26,Z+24 + ldd r27,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + ld r18,Z + ldd r19,Z+1 + std Y+1,r18 + std Y+2,r19 + ldd r18,Z+2 + ldd r19,Z+3 + std Y+3,r18 + std Y+4,r19 + ldd r18,Z+4 + ldd r19,Z+5 + std Y+5,r18 + std Y+6,r19 + ldd r18,Z+6 + ldd r19,Z+7 + std Y+7,r18 + std Y+8,r19 + ldd r18,Z+8 + ldd r19,Z+9 + std Y+9,r18 + std Y+10,r19 + ldd r18,Z+10 + ldd r19,Z+11 + std Y+11,r18 + std Y+12,r19 + ldd r18,Z+12 + ldd r19,Z+13 + std Y+13,r18 + std Y+14,r19 + ldd r18,Z+14 + ldd r19,Z+15 + std Y+15,r18 + std Y+16,r19 + ldd r18,Z+16 + ldd r19,Z+17 + std Y+17,r18 + std Y+18,r19 + ldd r18,Z+18 + ldd r19,Z+19 + std Y+19,r18 + std Y+20,r19 + ldd r18,Z+20 + ldd r19,Z+21 + std Y+21,r18 + std Y+22,r19 + ldd r18,Z+22 + ldd r19,Z+23 + std Y+23,r18 + std Y+24,r19 + push r31 + push r30 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 + lsl r20 +61: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r8,Y+3 + ldd r9,Y+4 + ldd r10,Y+5 + ldd r11,Y+6 + ldd r12,Y+7 + ldd r13,Y+8 + std Y+1,r10 + std Y+2,r11 + std Y+3,r12 + std Y+4,r13 + mov r11,r18 + ldi r25,240 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+5,r10 + std Y+6,r11 + std Y+7,r12 + std Y+8,r13 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r8,Y+11 + ldd r9,Y+12 + ldd r10,Y+13 + ldd r11,Y+14 + ldd r12,Y+15 + ldd r13,Y+16 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r11,r18 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+13,r10 + std Y+14,r11 + std Y+15,r12 + std Y+16,r13 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r18,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r8,Y+19 + ldd r9,Y+20 + ldd r10,Y+21 + ldd r11,Y+22 + ldd r12,Y+23 + ldd r13,Y+24 + std Y+17,r10 + std Y+18,r11 + std Y+19,r12 + std Y+20,r13 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r11,r18 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+21,r10 + std Y+22,r11 + std Y+23,r12 + std Y+24,r13 + movw r18,r26 + movw r26,r2 + movw r2,r4 + movw r4,r6 + movw r6,r18 + eor r6,r4 + eor r7,r5 + eor r4,r26 + eor r5,r27 + eor r2,r4 + eor r3,r5 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + mov r0,r5 + mov r5,r4 + mov r4,r0 + mov r0,r7 + mov r7,r6 + mov r6,r0 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + ldd r18,Y+1 + ldd r19,Y+2 + eor r26,r18 + eor r27,r19 + ldd r18,Y+3 + ldd r19,Y+4 + eor r2,r18 + eor r3,r19 + ldd r18,Y+9 + ldd r19,Y+10 + eor r26,r18 + eor r27,r19 + ldd r18,Y+11 + ldd r19,Y+12 + eor r2,r18 + eor r3,r19 + ldd r18,Y+17 + ldd r19,Y+18 + eor r26,r18 + eor r27,r19 + ldd r18,Y+19 + ldd r19,Y+20 + eor r2,r18 + eor r3,r19 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r3,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r27,r18 + ldi r18,32 + eor r5,r18 + eor r26,r18 + ldi r30,lo8(table_6) + ldi r31,hi8(table_6) +#if defined(RAMPZ) + ldi r18,hh8(table_6) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + cp r22,r20 + breq 5268f + rjmp 61b +5268: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+24,r26 + std Z+25,r27 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Y+1 + ldd r19,Y+2 + st Z,r18 + std Z+1,r19 + ldd r18,Y+3 + ldd r19,Y+4 + std Z+2,r18 + std Z+3,r19 + ldd r18,Y+5 + ldd r19,Y+6 + std Z+4,r18 + std Z+5,r19 + ldd r18,Y+7 + ldd r19,Y+8 + std Z+6,r18 + std Z+7,r19 + ldd r18,Y+9 + ldd r19,Y+10 + std Z+8,r18 + std Z+9,r19 + ldd r18,Y+11 + ldd r19,Y+12 + std Z+10,r18 + std Z+11,r19 + ldd r18,Y+13 + ldd r19,Y+14 + std Z+12,r18 + std Z+13,r19 + ldd r18,Y+15 + ldd r19,Y+16 + std Z+14,r18 + std Z+15,r19 + ldd r18,Y+17 + ldd r19,Y+18 + std Z+16,r18 + std Z+17,r19 + ldd r18,Y+19 + ldd r19,Y+20 + std Z+18,r18 + std Z+19,r19 + ldd r18,Y+21 + ldd r19,Y+22 + std Z+20,r18 + std Z+21,r19 + ldd r18,Y+23 + ldd r19,Y+24 + std Z+22,r18 + std Z+23,r19 + adiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_inv_rounds, .-forkskinny_64_192_inv_rounds + + .text +.global forkskinny_64_192_forward_tk + .type forkskinny_64_192_forward_tk, @function +forkskinny_64_192_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r28,Z+6 + ldd r29,Z+7 + ldd r2,Z+8 + ldd r3,Z+9 + ldd r4,Z+10 + ldd r5,Z+11 + ldd r6,Z+12 + ldd r7,Z+13 + ldd r8,Z+14 + ldd r9,Z+15 + ldd r10,Z+16 + ldd r11,Z+17 + ldd r12,Z+18 + ldd r13,Z+19 + ldd r14,Z+20 + ldd r15,Z+21 + ldd r24,Z+22 + ldd r25,Z+23 + push r31 + push r30 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +27: + push r19 + push r18 + push r21 + push r20 + mov r19,r27 + swap r19 + andi r19,240 + mov r23,r28 + andi r23,15 + or r19,r23 + mov r18,r27 + andi r18,240 + mov r23,r29 + andi r23,15 + or r18,r23 + mov r21,r26 + andi r21,240 + swap r28 + andi r28,15 + or r21,r28 + mov r20,r29 + andi r20,240 + andi r26,15 + or r20,r26 + pop r28 + pop r29 + pop r26 + pop r27 + push r3 + push r2 + push r5 + push r4 + mov r3,r7 + swap r3 + ldi r17,240 + and r3,r17 + mov r23,r8 + andi r23,15 + or r3,r23 + mov r2,r7 + and r2,r17 + mov r23,r9 + andi r23,15 + or r2,r23 + mov r5,r6 + and r5,r17 + swap r8 + ldi r16,15 + and r8,r16 + or r5,r8 + mov r4,r9 + and r4,r17 + and r6,r16 + or r4,r6 + pop r8 + pop r9 + pop r6 + pop r7 + push r11 + push r10 + push r13 + push r12 + mov r11,r15 + swap r11 + and r11,r17 + mov r23,r24 + andi r23,15 + or r11,r23 + mov r10,r15 + and r10,r17 + mov r23,r25 + andi r23,15 + or r10,r23 + mov r13,r14 + and r13,r17 + swap r24 + andi r24,15 + or r13,r24 + mov r12,r25 + and r12,r17 + and r14,r16 + or r12,r14 + pop r24 + pop r25 + pop r14 + pop r15 + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + dec r22 + breq 5125f + rjmp 27b +5125: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r26 + std Z+5,r27 + std Z+6,r28 + std Z+7,r29 + std Z+8,r2 + std Z+9,r3 + std Z+10,r4 + std Z+11,r5 + std Z+12,r6 + std Z+13,r7 + std Z+14,r8 + std Z+15,r9 + std Z+16,r10 + std Z+17,r11 + std Z+18,r12 + std Z+19,r13 + std Z+20,r14 + std Z+21,r15 + std Z+22,r24 + std Z+23,r25 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_forward_tk, .-forkskinny_64_192_forward_tk + + .text +.global forkskinny_64_192_reverse_tk + .type forkskinny_64_192_reverse_tk, @function +forkskinny_64_192_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r28,Z+6 + ldd r29,Z+7 + ldd r2,Z+8 + ldd r3,Z+9 + ldd r4,Z+10 + ldd r5,Z+11 + ldd r6,Z+12 + ldd r7,Z+13 + ldd r8,Z+14 + ldd r9,Z+15 + ldd r10,Z+16 + ldd r11,Z+17 + ldd r12,Z+18 + ldd r13,Z+19 + ldd r14,Z+20 + ldd r15,Z+21 + ldd r24,Z+22 + ldd r25,Z+23 + push r31 + push r30 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +27: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + push r27 + push r26 + push r29 + push r28 + mov r27,r18 + andi r27,240 + mov r23,r19 + swap r23 + andi r23,15 + or r27,r23 + mov r26,r21 + andi r26,240 + mov r23,r20 + andi r23,15 + or r26,r23 + mov r29,r20 + andi r29,240 + andi r18,15 + or r29,r18 + mov r28,r21 + swap r28 + andi r28,240 + andi r19,15 + or r28,r19 + pop r20 + pop r21 + pop r18 + pop r19 + push r7 + push r6 + push r9 + push r8 + mov r7,r2 + ldi r17,240 + and r7,r17 + mov r23,r3 + swap r23 + andi r23,15 + or r7,r23 + mov r6,r5 + and r6,r17 + mov r23,r4 + andi r23,15 + or r6,r23 + mov r9,r4 + and r9,r17 + ldi r16,15 + and r2,r16 + or r9,r2 + mov r8,r5 + swap r8 + and r8,r17 + and r3,r16 + or r8,r3 + pop r4 + pop r5 + pop r2 + pop r3 + push r15 + push r14 + push r25 + push r24 + mov r15,r10 + and r15,r17 + mov r23,r11 + swap r23 + andi r23,15 + or r15,r23 + mov r14,r13 + and r14,r17 + mov r23,r12 + andi r23,15 + or r14,r23 + mov r25,r12 + andi r25,240 + and r10,r16 + or r25,r10 + mov r24,r13 + swap r24 + andi r24,240 + and r11,r16 + or r24,r11 + pop r12 + pop r13 + pop r10 + pop r11 + dec r22 + breq 5125f + rjmp 27b +5125: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r26 + std Z+5,r27 + std Z+6,r28 + std Z+7,r29 + std Z+8,r2 + std Z+9,r3 + std Z+10,r4 + std Z+11,r5 + std Z+12,r6 + std Z+13,r7 + std Z+14,r8 + std Z+15,r9 + std Z+16,r10 + std Z+17,r11 + std Z+18,r12 + std Z+19,r13 + std Z+20,r14 + std Z+21,r15 + std Z+22,r24 + std Z+23,r25 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_reverse_tk, .-forkskinny_64_192_reverse_tk + +#endif diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/rhys/internal-forkskinny.c b/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/rhys/internal-forkskinny.c index b050ff1..6e2ac55 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/rhys/internal-forkskinny.c +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/rhys/internal-forkskinny.c @@ -40,35 +40,10 @@ static unsigned char const RC[87] = { 0x4a, 0x14, 0x29, 0x52, 0x24, 0x48, 0x10 }; -/** - * \brief Number of rounds of ForkSkinny-128-256 before forking. - */ -#define FORKSKINNY_128_256_ROUNDS_BEFORE 21 - -/** - * \brief Number of rounds of ForkSkinny-128-256 after forking. - */ -#define FORKSKINNY_128_256_ROUNDS_AFTER 27 - -/** - * \brief State information for ForkSkinny-128-256. - */ -typedef struct -{ - uint32_t TK1[4]; /**< First part of the tweakey */ - uint32_t TK2[4]; /**< Second part of the tweakey */ - uint32_t S[4]; /**< Current block state */ +#if !defined(__AVR__) -} forkskinny_128_256_state_t; - -/** - * \brief Applies one round of ForkSkinny-128-256. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_256_round - (forkskinny_128_256_state_t *state, unsigned round) +void forkskinny_128_256_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -79,137 +54,52 @@ static void forkskinny_128_256_round s2 = state->S[2]; s3 = state->S[3]; - /* Apply the S-box to all cells in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny128_sbox(s0); + skinny128_sbox(s1); + skinny128_sbox(s2); + skinny128_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); + s2 ^= 0x02; + + /* Shift the cells in the rows right, which moves the cell + * values up closer to the MSB. That is, we do a left rotate + * on the word to rotate the cells in the word right */ + s1 = leftRotate8(s1); + s2 = leftRotate16(s2); + s3 = leftRotate24(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + + /* Permute TK1 and TK2 for the next round */ + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + } /* Save the local variables back to the state */ state->S[0] = s0; state->S[1] = s1; state->S[2] = s2; state->S[3] = s3; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(state->TK1); - skinny128_permute_tk(state->TK2); - skinny128_LFSR2(state->TK2[0]); - skinny128_LFSR2(state->TK2[1]); -} - -void forkskinny_128_256_encrypt - (const unsigned char key[32], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_256_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_128_256_ROUNDS_BEFORE; ++round) { - forkskinny_128_256_round(&state, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint32_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&state, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x08040201U; /* Branching constant */ - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - for (round = (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); ++round) { - forkskinny_128_256_round(&state, round); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&state, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - } } -/** - * \brief Applies one round of ForkSkinny-128-256 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_256_inv_round - (forkskinny_128_256_state_t *state, unsigned round) +void forkskinny_128_256_inv_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -220,39 +110,42 @@ static void forkskinny_128_256_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Permute TK1 and TK2 for the next round */ - skinny128_inv_LFSR2(state->TK2[0]); - skinny128_inv_LFSR2(state->TK2[1]); - skinny128_inv_permute_tk(state->TK1); - skinny128_inv_permute_tk(state->TK2); - - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left, which moves the cell - * values down closer to the LSB. That is, we do a right - * rotate on the word to rotate the cells in the word left */ - s1 = rightRotate8(s1); - s2 = rightRotate16(s2); - s3 = rightRotate24(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1 and TK2 for the next round */ + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left, which moves the cell + * values down closer to the LSB. That is, we do a right + * rotate on the word to rotate the cells in the word left */ + s1 = rightRotate8(s1); + s2 = rightRotate16(s2); + s3 = rightRotate24(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); + s2 ^= 0x02; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny128_inv_sbox(s0); + skinny128_inv_sbox(s1); + skinny128_inv_sbox(s2); + skinny128_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -261,115 +154,64 @@ static void forkskinny_128_256_inv_round state->S[3] = s3; } -void forkskinny_128_256_decrypt - (const unsigned char key[32], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_128_256_forward_tk + (forkskinny_128_256_state_t *state, unsigned rounds) { - forkskinny_128_256_state_t state; - forkskinny_128_256_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); ++round) { - skinny128_permute_tk(state.TK1); - skinny128_permute_tk(state.TK2); - skinny128_LFSR2(state.TK2[0]); - skinny128_LFSR2(state.TK2[1]); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR2(state->TK2[2]); + skinny128_LFSR2(state->TK2[3]); + } + rounds -= 16; } - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); - round > (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); --round) { - forkskinny_128_256_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + --rounds; } +} - /* Remove the branching constant */ - state.S[0] ^= 0x08040201U; - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_128_256_ROUNDS_AFTER; ++round) { - skinny128_inv_LFSR2(state.TK2[0]); - skinny128_inv_LFSR2(state.TK2[1]); - skinny128_inv_permute_tk(state.TK1); - skinny128_inv_permute_tk(state.TK2); +void forkskinny_128_256_reverse_tk + (forkskinny_128_256_state_t *state, unsigned rounds) +{ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR2(state->TK2[2]); + skinny128_inv_LFSR2(state->TK2[3]); + } + rounds -= 16; } - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_128_256_inv_round(&state, round - 1); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&fstate, round); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + --rounds; } - le_store_word32(output_right, fstate.S[0]); - le_store_word32(output_right + 4, fstate.S[1]); - le_store_word32(output_right + 8, fstate.S[2]); - le_store_word32(output_right + 12, fstate.S[3]); } -/** - * \brief Number of rounds of ForkSkinny-128-384 before forking. - */ -#define FORKSKINNY_128_384_ROUNDS_BEFORE 25 - -/** - * \brief Number of rounds of ForkSkinny-128-384 after forking. - */ -#define FORKSKINNY_128_384_ROUNDS_AFTER 31 - -/** - * \brief State information for ForkSkinny-128-384. - */ -typedef struct -{ - uint32_t TK1[4]; /**< First part of the tweakey */ - uint32_t TK2[4]; /**< Second part of the tweakey */ - uint32_t TK3[4]; /**< Third part of the tweakey */ - uint32_t S[4]; /**< Current block state */ - -} forkskinny_128_384_state_t; - -/** - * \brief Applies one round of ForkSkinny-128-384. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_384_round - (forkskinny_128_384_state_t *state, unsigned round) +void forkskinny_128_384_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -380,145 +222,56 @@ static void forkskinny_128_384_round s2 = state->S[2]; s3 = state->S[3]; - /* Apply the S-box to all cells in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - (rc & 0x0F) ^ 0x00020000; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny128_sbox(s0); + skinny128_sbox(s1); + skinny128_sbox(s2); + skinny128_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + (rc & 0x0F) ^ 0x00020000; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); + s2 ^= 0x02; + + /* Shift the cells in the rows right, which moves the cell + * values up closer to the MSB. That is, we do a left rotate + * on the word to rotate the cells in the word right */ + s1 = leftRotate8(s1); + s2 = leftRotate16(s2); + s3 = leftRotate24(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + + /* Permute TK1, TK2, and TK3 for the next round */ + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_permute_tk(state->TK3); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + } /* Save the local variables back to the state */ state->S[0] = s0; state->S[1] = s1; state->S[2] = s2; state->S[3] = s3; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny128_permute_tk(state->TK1); - skinny128_permute_tk(state->TK2); - skinny128_permute_tk(state->TK3); - skinny128_LFSR2(state->TK2[0]); - skinny128_LFSR2(state->TK2[1]); - skinny128_LFSR3(state->TK3[0]); - skinny128_LFSR3(state->TK3[1]); } -void forkskinny_128_384_encrypt - (const unsigned char key[48], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_384_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.TK3[0] = le_load_word32(key + 32); - state.TK3[1] = le_load_word32(key + 36); - state.TK3[2] = le_load_word32(key + 40); - state.TK3[3] = le_load_word32(key + 44); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_128_384_ROUNDS_BEFORE; ++round) { - forkskinny_128_384_round(&state, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint32_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&state, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x08040201U; /* Branching constant */ - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - for (round = (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); ++round) { - forkskinny_128_384_round(&state, round); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&state, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - } -} - -/** - * \brief Applies one round of ForkSkinny-128-384 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_384_inv_round - (forkskinny_128_384_state_t *state, unsigned round) +void forkskinny_128_384_inv_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -529,43 +282,46 @@ static void forkskinny_128_384_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Permute TK1 and TK2 for the next round */ - skinny128_inv_LFSR2(state->TK2[0]); - skinny128_inv_LFSR2(state->TK2[1]); - skinny128_inv_LFSR3(state->TK3[0]); - skinny128_inv_LFSR3(state->TK3[1]); - skinny128_inv_permute_tk(state->TK1); - skinny128_inv_permute_tk(state->TK2); - skinny128_inv_permute_tk(state->TK3); - - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left, which moves the cell - * values down closer to the LSB. That is, we do a right - * rotate on the word to rotate the cells in the word left */ - s1 = rightRotate8(s1); - s2 = rightRotate16(s2); - s3 = rightRotate24(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - (rc & 0x0F) ^ 0x00020000; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1 and TK2 for the next round */ + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + skinny128_inv_permute_tk(state->TK3); + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left, which moves the cell + * values down closer to the LSB. That is, we do a right + * rotate on the word to rotate the cells in the word left */ + s1 = rightRotate8(s1); + s2 = rightRotate16(s2); + s3 = rightRotate24(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + (rc & 0x0F) ^ 0x00020000; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); + s2 ^= 0x02; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny128_inv_sbox(s0); + skinny128_inv_sbox(s1); + skinny128_inv_sbox(s2); + skinny128_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -574,128 +330,78 @@ static void forkskinny_128_384_inv_round state->S[3] = s3; } -void forkskinny_128_384_decrypt - (const unsigned char key[48], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_128_384_forward_tk + (forkskinny_128_384_state_t *state, unsigned rounds) { - forkskinny_128_384_state_t state; - forkskinny_128_384_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.TK3[0] = le_load_word32(key + 32); - state.TK3[1] = le_load_word32(key + 36); - state.TK3[2] = le_load_word32(key + 40); - state.TK3[3] = le_load_word32(key + 44); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); ++round) { - skinny128_permute_tk(state.TK1); - skinny128_permute_tk(state.TK2); - skinny128_permute_tk(state.TK3); - skinny128_LFSR2(state.TK2[0]); - skinny128_LFSR2(state.TK2[1]); - skinny128_LFSR3(state.TK3[0]); - skinny128_LFSR3(state.TK3[1]); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR2(state->TK2[2]); + skinny128_LFSR2(state->TK2[3]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + skinny128_LFSR3(state->TK3[2]); + skinny128_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); - round > (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); --round) { - forkskinny_128_384_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_permute_tk(state->TK3); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + --rounds; } +} - /* Remove the branching constant */ - state.S[0] ^= 0x08040201U; - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_128_384_ROUNDS_AFTER; ++round) { - skinny128_inv_LFSR2(state.TK2[0]); - skinny128_inv_LFSR2(state.TK2[1]); - skinny128_inv_LFSR3(state.TK3[0]); - skinny128_inv_LFSR3(state.TK3[1]); - skinny128_inv_permute_tk(state.TK1); - skinny128_inv_permute_tk(state.TK2); - skinny128_inv_permute_tk(state.TK3); +void forkskinny_128_384_reverse_tk + (forkskinny_128_384_state_t *state, unsigned rounds) +{ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR2(state->TK2[2]); + skinny128_inv_LFSR2(state->TK2[3]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_LFSR3(state->TK3[2]); + skinny128_inv_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_128_384_inv_round(&state, round - 1); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&fstate, round); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + skinny128_inv_permute_tk(state->TK3); + --rounds; } - le_store_word32(output_right, fstate.S[0]); - le_store_word32(output_right + 4, fstate.S[1]); - le_store_word32(output_right + 8, fstate.S[2]); - le_store_word32(output_right + 12, fstate.S[3]); } -/** - * \brief Number of rounds of ForkSkinny-64-192 before forking. - */ -#define FORKSKINNY_64_192_ROUNDS_BEFORE 17 - -/** - * \brief Number of rounds of ForkSkinny-64-192 after forking. - */ -#define FORKSKINNY_64_192_ROUNDS_AFTER 23 - -/** - * \brief State information for ForkSkinny-64-192. - */ -typedef struct -{ - uint16_t TK1[4]; /**< First part of the tweakey */ - uint16_t TK2[4]; /**< Second part of the tweakey */ - uint16_t TK3[4]; /**< Third part of the tweakey */ - uint16_t S[4]; /**< Current block state */ - -} forkskinny_64_192_state_t; - -/** - * \brief Applies one round of ForkSkinny-64-192. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - * - * Note: The cells of each row are order in big-endian nibble order - * so it is easiest to manage the rows in bit-endian byte order. - */ -static void forkskinny_64_192_round - (forkskinny_64_192_state_t *state, unsigned round) +void forkskinny_64_192_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last) { uint16_t s0, s1, s2, s3, temp; uint8_t rc; @@ -706,144 +412,55 @@ static void forkskinny_64_192_round s2 = state->S[2]; s3 = state->S[3]; - /* Apply the S-box to all cells in the state */ - skinny64_sbox(s0); - skinny64_sbox(s1); - skinny64_sbox(s2); - skinny64_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - ((rc & 0x0F) << 12) ^ 0x0020; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ - ((rc & 0x70) << 8); - s2 ^= 0x2000; - - /* Shift the cells in the rows right */ - s1 = rightRotate4_16(s1); - s2 = rightRotate8_16(s2); - s3 = rightRotate12_16(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny64_sbox(s0); + skinny64_sbox(s1); + skinny64_sbox(s2); + skinny64_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + ((rc & 0x0F) << 12) ^ 0x0020; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ + ((rc & 0x70) << 8); + s2 ^= 0x2000; + + /* Shift the cells in the rows right */ + s1 = rightRotate4_16(s1); + s2 = rightRotate8_16(s2); + s3 = rightRotate12_16(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + + /* Permute TK1, TK2, and TK3 for the next round */ + skinny64_permute_tk(state->TK1); + skinny64_permute_tk(state->TK2); + skinny64_permute_tk(state->TK3); + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + } /* Save the local variables back to the state */ state->S[0] = s0; state->S[1] = s1; state->S[2] = s2; state->S[3] = s3; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny64_permute_tk(state->TK1); - skinny64_permute_tk(state->TK2); - skinny64_permute_tk(state->TK3); - skinny64_LFSR2(state->TK2[0]); - skinny64_LFSR2(state->TK2[1]); - skinny64_LFSR3(state->TK3[0]); - skinny64_LFSR3(state->TK3[1]); } -void forkskinny_64_192_encrypt - (const unsigned char key[24], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_64_192_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = be_load_word16(key); - state.TK1[1] = be_load_word16(key + 2); - state.TK1[2] = be_load_word16(key + 4); - state.TK1[3] = be_load_word16(key + 6); - state.TK2[0] = be_load_word16(key + 8); - state.TK2[1] = be_load_word16(key + 10); - state.TK2[2] = be_load_word16(key + 12); - state.TK2[3] = be_load_word16(key + 14); - state.TK3[0] = be_load_word16(key + 16); - state.TK3[1] = be_load_word16(key + 18); - state.TK3[2] = be_load_word16(key + 20); - state.TK3[3] = be_load_word16(key + 22); - state.S[0] = be_load_word16(input); - state.S[1] = be_load_word16(input + 2); - state.S[2] = be_load_word16(input + 4); - state.S[3] = be_load_word16(input + 6); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_64_192_ROUNDS_BEFORE; ++round) { - forkskinny_64_192_round(&state, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint16_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_right, state.S[0]); - be_store_word16(output_right + 2, state.S[1]); - be_store_word16(output_right + 4, state.S[2]); - be_store_word16(output_right + 6, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x1249U; /* Branching constant */ - state.S[1] ^= 0x36daU; - state.S[2] ^= 0x5b7fU; - state.S[3] ^= 0xec81U; - for (round = (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_left, state.S[0]); - be_store_word16(output_left + 2, state.S[1]); - be_store_word16(output_left + 4, state.S[2]); - be_store_word16(output_left + 6, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_right, state.S[0]); - be_store_word16(output_right + 2, state.S[1]); - be_store_word16(output_right + 4, state.S[2]); - be_store_word16(output_right + 6, state.S[3]); - } -} - -/** - * \brief Applies one round of ForkSkinny-64-192 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_64_192_inv_round - (forkskinny_64_192_state_t *state, unsigned round) +void forkskinny_64_192_inv_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last) { uint16_t s0, s1, s2, s3, temp; uint8_t rc; @@ -854,42 +471,45 @@ static void forkskinny_64_192_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Permute TK1, TK2, and TK3 for the next round */ - skinny64_inv_LFSR2(state->TK2[0]); - skinny64_inv_LFSR2(state->TK2[1]); - skinny64_inv_LFSR3(state->TK3[0]); - skinny64_inv_LFSR3(state->TK3[1]); - skinny64_inv_permute_tk(state->TK1); - skinny64_inv_permute_tk(state->TK2); - skinny64_inv_permute_tk(state->TK3); - - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left */ - s1 = leftRotate4_16(s1); - s2 = leftRotate8_16(s2); - s3 = leftRotate12_16(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - ((rc & 0x0F) << 12) ^ 0x0020; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ - ((rc & 0x70) << 8); - s2 ^= 0x2000; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny64_inv_sbox(s0); - skinny64_inv_sbox(s1); - skinny64_inv_sbox(s2); - skinny64_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1, TK2, and TK3 for the next round */ + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_permute_tk(state->TK1); + skinny64_inv_permute_tk(state->TK2); + skinny64_inv_permute_tk(state->TK3); + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left */ + s1 = leftRotate4_16(s1); + s2 = leftRotate8_16(s2); + s3 = leftRotate12_16(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + ((rc & 0x0F) << 12) ^ 0x0020; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ + ((rc & 0x70) << 8); + s2 ^= 0x2000; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny64_inv_sbox(s0); + skinny64_inv_sbox(s1); + skinny64_inv_sbox(s2); + skinny64_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -898,91 +518,74 @@ static void forkskinny_64_192_inv_round state->S[3] = s3; } -void forkskinny_64_192_decrypt - (const unsigned char key[24], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_64_192_forward_tk + (forkskinny_64_192_state_t *state, unsigned rounds) { - forkskinny_64_192_state_t state; - forkskinny_64_192_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = be_load_word16(key); - state.TK1[1] = be_load_word16(key + 2); - state.TK1[2] = be_load_word16(key + 4); - state.TK1[3] = be_load_word16(key + 6); - state.TK2[0] = be_load_word16(key + 8); - state.TK2[1] = be_load_word16(key + 10); - state.TK2[2] = be_load_word16(key + 12); - state.TK2[3] = be_load_word16(key + 14); - state.TK3[0] = be_load_word16(key + 16); - state.TK3[1] = be_load_word16(key + 18); - state.TK3[2] = be_load_word16(key + 20); - state.TK3[3] = be_load_word16(key + 22); - state.S[0] = be_load_word16(input); - state.S[1] = be_load_word16(input + 2); - state.S[2] = be_load_word16(input + 4); - state.S[3] = be_load_word16(input + 6); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); ++round) { - skinny64_permute_tk(state.TK1); - skinny64_permute_tk(state.TK2); - skinny64_permute_tk(state.TK3); - skinny64_LFSR2(state.TK2[0]); - skinny64_LFSR2(state.TK2[1]); - skinny64_LFSR3(state.TK3[0]); - skinny64_LFSR3(state.TK3[1]); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny64_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR2(state->TK2[2]); + skinny64_LFSR2(state->TK2[3]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + skinny64_LFSR3(state->TK3[2]); + skinny64_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); - round > (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); --round) { - forkskinny_64_192_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny64_permute_tk(state->TK1); + skinny64_permute_tk(state->TK2); + skinny64_permute_tk(state->TK3); + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + --rounds; } +} - /* Remove the branching constant */ - state.S[0] ^= 0x1249U; - state.S[1] ^= 0x36daU; - state.S[2] ^= 0x5b7fU; - state.S[3] ^= 0xec81U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_64_192_ROUNDS_AFTER; ++round) { - skinny64_inv_LFSR2(state.TK2[0]); - skinny64_inv_LFSR2(state.TK2[1]); - skinny64_inv_LFSR3(state.TK3[0]); - skinny64_inv_LFSR3(state.TK3[1]); - skinny64_inv_permute_tk(state.TK1); - skinny64_inv_permute_tk(state.TK2); - skinny64_inv_permute_tk(state.TK3); +void forkskinny_64_192_reverse_tk + (forkskinny_64_192_state_t *state, unsigned rounds) +{ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny64_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR2(state->TK2[2]); + skinny64_inv_LFSR2(state->TK2[3]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_LFSR3(state->TK3[2]); + skinny64_inv_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_64_192_inv_round(&state, round - 1); - } - be_store_word16(output_left, state.S[0]); - be_store_word16(output_left + 2, state.S[1]); - be_store_word16(output_left + 4, state.S[2]); - be_store_word16(output_left + 6, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&fstate, round); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_permute_tk(state->TK1); + skinny64_inv_permute_tk(state->TK2); + skinny64_inv_permute_tk(state->TK3); + --rounds; } - be_store_word16(output_right, fstate.S[0]); - be_store_word16(output_right + 2, fstate.S[1]); - be_store_word16(output_right + 4, fstate.S[2]); - be_store_word16(output_right + 6, fstate.S[3]); } + +#endif /* !__AVR__ */ diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/rhys/internal-forkskinny.h b/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/rhys/internal-forkskinny.h index 0c1a707..e3014d3 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/rhys/internal-forkskinny.h +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/rhys/internal-forkskinny.h @@ -23,6 +23,8 @@ #ifndef LW_INTERNAL_FORKSKINNY_H #define LW_INTERNAL_FORKSKINNY_H +#include "internal-util.h" + /** * \file internal-forkskinny.h * \brief ForkSkinny block cipher family. @@ -39,6 +41,158 @@ extern "C" { #endif /** + * \brief State information for ForkSkinny-128-256. + */ +typedef struct +{ + uint32_t TK1[4]; /**< First part of the tweakey */ + uint32_t TK2[4]; /**< Second part of the tweakey */ + uint32_t S[4]; /**< Current block state */ + +} forkskinny_128_256_state_t; + +/** + * \brief State information for ForkSkinny-128-384. + */ +typedef struct +{ + uint32_t TK1[4]; /**< First part of the tweakey */ + uint32_t TK2[4]; /**< Second part of the tweakey */ + uint32_t TK3[4]; /**< Third part of the tweakey */ + uint32_t S[4]; /**< Current block state */ + +} forkskinny_128_384_state_t; + +/** + * \brief State information for ForkSkinny-64-192. + */ +typedef struct +{ + uint16_t TK1[4]; /**< First part of the tweakey */ + uint16_t TK2[4]; /**< Second part of the tweakey */ + uint16_t TK3[4]; /**< Third part of the tweakey */ + uint16_t S[4]; /**< Current block state */ + +} forkskinny_64_192_state_t; + +/** + * \brief Applies several rounds of ForkSkinny-128-256. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + */ +void forkskinny_128_256_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-128-256 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_128_256_inv_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-128-256. + * + * \param state Points to the ForkSkinny-128-256 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_128_256_forward_tk + (forkskinny_128_256_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-128-256. + * + * \param state Points to the ForkSkinny-128-256 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_128_256_reverse_tk + (forkskinny_128_256_state_t *state, unsigned rounds); + +/** + * \brief Applies several rounds of ForkSkinny-128-384. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + */ +void forkskinny_128_384_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-128-384 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_128_384_inv_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-128-384. + * + * \param state Points to the ForkSkinny-128-384 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_128_384_forward_tk + (forkskinny_128_384_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-128-384. + * + * \param state Points to the ForkSkinny-128-384 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_128_384_reverse_tk + (forkskinny_128_384_state_t *state, unsigned rounds); + +/** + * \brief Applies several rounds of ForkSkinny-64-192. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + * + * Note: The cells of each row are ordered in big-endian nibble order + * so it is simplest to manage the rows in big-endian byte order. + */ +void forkskinny_64_192_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-64-192 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_64_192_inv_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-64-192. + * + * \param state Points to the ForkSkinny-64-192 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_64_192_forward_tk + (forkskinny_64_192_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-64-192. + * + * \param state Points to the ForkSkinny-64-192 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_64_192_reverse_tk + (forkskinny_64_192_state_t *state, unsigned rounds); + +/** * \brief Encrypts a block of plaintext with ForkSkinny-128-256. * * \param key 256-bit tweakey for ForkSkinny-128-256. diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/rhys/internal-skinnyutil.h b/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/rhys/internal-skinnyutil.h index 83136cb..8a5296d 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/rhys/internal-skinnyutil.h +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t256n112v1/rhys/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/rhys/forkae.c b/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/rhys/forkae.c index 4a9671a..49e7610 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/rhys/forkae.c +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/rhys/forkae.c @@ -22,7 +22,6 @@ #include "forkae.h" #include "internal-forkskinny.h" -#include "internal-util.h" #include aead_cipher_t const forkae_paef_64_192_cipher = { @@ -138,3 +137,476 @@ aead_cipher_t const forkae_saef_128_256_cipher = { #define FORKAE_TWEAKEY_REDUCED_SIZE 32 #define FORKAE_BLOCK_FUNC forkskinny_128_256 #include "internal-forkae-saef.h" + +/* Helper functions to implement the forking encrypt/decrypt block operations + * on top of the basic "perform N rounds" functions in internal-forkskinny.c */ + +/** + * \brief Number of rounds of ForkSkinny-128-256 before forking. + */ +#define FORKSKINNY_128_256_ROUNDS_BEFORE 21 + +/** + * \brief Number of rounds of ForkSkinny-128-256 after forking. + */ +#define FORKSKINNY_128_256_ROUNDS_AFTER 27 + +void forkskinny_128_256_encrypt + (const unsigned char key[32], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_256_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Run all of the rounds before the forking point */ + forkskinny_128_256_rounds(&state, 0, FORKSKINNY_128_256_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint32_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x08040201U; /* Branching constant */ + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + } +} + +void forkskinny_128_256_decrypt + (const unsigned char key[32], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_256_state_t state; + forkskinny_128_256_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_128_256_forward_tk + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_128_256_inv_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x08040201U; + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_128_256_reverse_tk(&state, FORKSKINNY_128_256_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_128_256_inv_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, 0); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_128_256_rounds + (&fstate, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, fstate.S[0]); + le_store_word32(output_right + 4, fstate.S[1]); + le_store_word32(output_right + 8, fstate.S[2]); + le_store_word32(output_right + 12, fstate.S[3]); +} + +/** + * \brief Number of rounds of ForkSkinny-128-384 before forking. + */ +#define FORKSKINNY_128_384_ROUNDS_BEFORE 25 + +/** + * \brief Number of rounds of ForkSkinny-128-384 after forking. + */ +#define FORKSKINNY_128_384_ROUNDS_AFTER 31 + +void forkskinny_128_384_encrypt + (const unsigned char key[48], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_384_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.TK3[0] = le_load_word32(key + 32); + state.TK3[1] = le_load_word32(key + 36); + state.TK3[2] = le_load_word32(key + 40); + state.TK3[3] = le_load_word32(key + 44); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Run all of the rounds before the forking point */ + forkskinny_128_384_rounds(&state, 0, FORKSKINNY_128_384_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint32_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x08040201U; /* Branching constant */ + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + } +} + +void forkskinny_128_384_decrypt + (const unsigned char key[48], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_384_state_t state; + forkskinny_128_384_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.TK3[0] = le_load_word32(key + 32); + state.TK3[1] = le_load_word32(key + 36); + state.TK3[2] = le_load_word32(key + 40); + state.TK3[3] = le_load_word32(key + 44); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_128_384_forward_tk + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_128_384_inv_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x08040201U; + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_128_384_reverse_tk(&state, FORKSKINNY_128_384_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_128_384_inv_rounds(&state, FORKSKINNY_128_384_ROUNDS_BEFORE, 0); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_128_384_rounds + (&fstate, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, fstate.S[0]); + le_store_word32(output_right + 4, fstate.S[1]); + le_store_word32(output_right + 8, fstate.S[2]); + le_store_word32(output_right + 12, fstate.S[3]); +} + +/** + * \brief Number of rounds of ForkSkinny-64-192 before forking. + */ +#define FORKSKINNY_64_192_ROUNDS_BEFORE 17 + +/** + * \brief Number of rounds of ForkSkinny-64-192 after forking. + */ +#define FORKSKINNY_64_192_ROUNDS_AFTER 23 + +void forkskinny_64_192_encrypt + (const unsigned char key[24], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_64_192_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = be_load_word16(key); + state.TK1[1] = be_load_word16(key + 2); + state.TK1[2] = be_load_word16(key + 4); + state.TK1[3] = be_load_word16(key + 6); + state.TK2[0] = be_load_word16(key + 8); + state.TK2[1] = be_load_word16(key + 10); + state.TK2[2] = be_load_word16(key + 12); + state.TK2[3] = be_load_word16(key + 14); + state.TK3[0] = be_load_word16(key + 16); + state.TK3[1] = be_load_word16(key + 18); + state.TK3[2] = be_load_word16(key + 20); + state.TK3[3] = be_load_word16(key + 22); + state.S[0] = be_load_word16(input); + state.S[1] = be_load_word16(input + 2); + state.S[2] = be_load_word16(input + 4); + state.S[3] = be_load_word16(input + 6); + + /* Run all of the rounds before the forking point */ + forkskinny_64_192_rounds(&state, 0, FORKSKINNY_64_192_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint16_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, state.S[0]); + be_store_word16(output_right + 2, state.S[1]); + be_store_word16(output_right + 4, state.S[2]); + be_store_word16(output_right + 6, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x1249U; /* Branching constant */ + state.S[1] ^= 0x36daU; + state.S[2] ^= 0x5b7fU; + state.S[3] ^= 0xec81U; + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2); + be_store_word16(output_left, state.S[0]); + be_store_word16(output_left + 2, state.S[1]); + be_store_word16(output_left + 4, state.S[2]); + be_store_word16(output_left + 6, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, state.S[0]); + be_store_word16(output_right + 2, state.S[1]); + be_store_word16(output_right + 4, state.S[2]); + be_store_word16(output_right + 6, state.S[3]); + } +} + +void forkskinny_64_192_decrypt + (const unsigned char key[24], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_64_192_state_t state; + forkskinny_64_192_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = be_load_word16(key); + state.TK1[1] = be_load_word16(key + 2); + state.TK1[2] = be_load_word16(key + 4); + state.TK1[3] = be_load_word16(key + 6); + state.TK2[0] = be_load_word16(key + 8); + state.TK2[1] = be_load_word16(key + 10); + state.TK2[2] = be_load_word16(key + 12); + state.TK2[3] = be_load_word16(key + 14); + state.TK3[0] = be_load_word16(key + 16); + state.TK3[1] = be_load_word16(key + 18); + state.TK3[2] = be_load_word16(key + 20); + state.TK3[3] = be_load_word16(key + 22); + state.S[0] = be_load_word16(input); + state.S[1] = be_load_word16(input + 2); + state.S[2] = be_load_word16(input + 4); + state.S[3] = be_load_word16(input + 6); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_64_192_forward_tk + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_64_192_inv_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x1249U; + state.S[1] ^= 0x36daU; + state.S[2] ^= 0x5b7fU; + state.S[3] ^= 0xec81U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_64_192_reverse_tk(&state, FORKSKINNY_64_192_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_64_192_inv_rounds(&state, FORKSKINNY_64_192_ROUNDS_BEFORE, 0); + be_store_word16(output_left, state.S[0]); + be_store_word16(output_left + 2, state.S[1]); + be_store_word16(output_left + 4, state.S[2]); + be_store_word16(output_left + 6, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_64_192_rounds + (&fstate, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, fstate.S[0]); + be_store_word16(output_right + 2, fstate.S[1]); + be_store_word16(output_right + 4, fstate.S[2]); + be_store_word16(output_right + 6, fstate.S[3]); +} diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/rhys/internal-forkskinny-avr.S b/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/rhys/internal-forkskinny-avr.S new file mode 100644 index 0000000..c7e0b37 --- /dev/null +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/rhys/internal-forkskinny-avr.S @@ -0,0 +1,8880 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_0, @object + .size table_0, 256 +table_0: + .byte 101 + .byte 76 + .byte 106 + .byte 66 + .byte 75 + .byte 99 + .byte 67 + .byte 107 + .byte 85 + .byte 117 + .byte 90 + .byte 122 + .byte 83 + .byte 115 + .byte 91 + .byte 123 + .byte 53 + .byte 140 + .byte 58 + .byte 129 + .byte 137 + .byte 51 + .byte 128 + .byte 59 + .byte 149 + .byte 37 + .byte 152 + .byte 42 + .byte 144 + .byte 35 + .byte 153 + .byte 43 + .byte 229 + .byte 204 + .byte 232 + .byte 193 + .byte 201 + .byte 224 + .byte 192 + .byte 233 + .byte 213 + .byte 245 + .byte 216 + .byte 248 + .byte 208 + .byte 240 + .byte 217 + .byte 249 + .byte 165 + .byte 28 + .byte 168 + .byte 18 + .byte 27 + .byte 160 + .byte 19 + .byte 169 + .byte 5 + .byte 181 + .byte 10 + .byte 184 + .byte 3 + .byte 176 + .byte 11 + .byte 185 + .byte 50 + .byte 136 + .byte 60 + .byte 133 + .byte 141 + .byte 52 + .byte 132 + .byte 61 + .byte 145 + .byte 34 + .byte 156 + .byte 44 + .byte 148 + .byte 36 + .byte 157 + .byte 45 + .byte 98 + .byte 74 + .byte 108 + .byte 69 + .byte 77 + .byte 100 + .byte 68 + .byte 109 + .byte 82 + .byte 114 + .byte 92 + .byte 124 + .byte 84 + .byte 116 + .byte 93 + .byte 125 + .byte 161 + .byte 26 + .byte 172 + .byte 21 + .byte 29 + .byte 164 + .byte 20 + .byte 173 + .byte 2 + .byte 177 + .byte 12 + .byte 188 + .byte 4 + .byte 180 + .byte 13 + .byte 189 + .byte 225 + .byte 200 + .byte 236 + .byte 197 + .byte 205 + .byte 228 + .byte 196 + .byte 237 + .byte 209 + .byte 241 + .byte 220 + .byte 252 + .byte 212 + .byte 244 + .byte 221 + .byte 253 + .byte 54 + .byte 142 + .byte 56 + .byte 130 + .byte 139 + .byte 48 + .byte 131 + .byte 57 + .byte 150 + .byte 38 + .byte 154 + .byte 40 + .byte 147 + .byte 32 + .byte 155 + .byte 41 + .byte 102 + .byte 78 + .byte 104 + .byte 65 + .byte 73 + .byte 96 + .byte 64 + .byte 105 + .byte 86 + .byte 118 + .byte 88 + .byte 120 + .byte 80 + .byte 112 + .byte 89 + .byte 121 + .byte 166 + .byte 30 + .byte 170 + .byte 17 + .byte 25 + .byte 163 + .byte 16 + .byte 171 + .byte 6 + .byte 182 + .byte 8 + .byte 186 + .byte 0 + .byte 179 + .byte 9 + .byte 187 + .byte 230 + .byte 206 + .byte 234 + .byte 194 + .byte 203 + .byte 227 + .byte 195 + .byte 235 + .byte 214 + .byte 246 + .byte 218 + .byte 250 + .byte 211 + .byte 243 + .byte 219 + .byte 251 + .byte 49 + .byte 138 + .byte 62 + .byte 134 + .byte 143 + .byte 55 + .byte 135 + .byte 63 + .byte 146 + .byte 33 + .byte 158 + .byte 46 + .byte 151 + .byte 39 + .byte 159 + .byte 47 + .byte 97 + .byte 72 + .byte 110 + .byte 70 + .byte 79 + .byte 103 + .byte 71 + .byte 111 + .byte 81 + .byte 113 + .byte 94 + .byte 126 + .byte 87 + .byte 119 + .byte 95 + .byte 127 + .byte 162 + .byte 24 + .byte 174 + .byte 22 + .byte 31 + .byte 167 + .byte 23 + .byte 175 + .byte 1 + .byte 178 + .byte 14 + .byte 190 + .byte 7 + .byte 183 + .byte 15 + .byte 191 + .byte 226 + .byte 202 + .byte 238 + .byte 198 + .byte 207 + .byte 231 + .byte 199 + .byte 239 + .byte 210 + .byte 242 + .byte 222 + .byte 254 + .byte 215 + .byte 247 + .byte 223 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_1, @object + .size table_1, 256 +table_1: + .byte 172 + .byte 232 + .byte 104 + .byte 60 + .byte 108 + .byte 56 + .byte 168 + .byte 236 + .byte 170 + .byte 174 + .byte 58 + .byte 62 + .byte 106 + .byte 110 + .byte 234 + .byte 238 + .byte 166 + .byte 163 + .byte 51 + .byte 54 + .byte 102 + .byte 99 + .byte 227 + .byte 230 + .byte 225 + .byte 164 + .byte 97 + .byte 52 + .byte 49 + .byte 100 + .byte 161 + .byte 228 + .byte 141 + .byte 201 + .byte 73 + .byte 29 + .byte 77 + .byte 25 + .byte 137 + .byte 205 + .byte 139 + .byte 143 + .byte 27 + .byte 31 + .byte 75 + .byte 79 + .byte 203 + .byte 207 + .byte 133 + .byte 192 + .byte 64 + .byte 21 + .byte 69 + .byte 16 + .byte 128 + .byte 197 + .byte 130 + .byte 135 + .byte 18 + .byte 23 + .byte 66 + .byte 71 + .byte 194 + .byte 199 + .byte 150 + .byte 147 + .byte 3 + .byte 6 + .byte 86 + .byte 83 + .byte 211 + .byte 214 + .byte 209 + .byte 148 + .byte 81 + .byte 4 + .byte 1 + .byte 84 + .byte 145 + .byte 212 + .byte 156 + .byte 216 + .byte 88 + .byte 12 + .byte 92 + .byte 8 + .byte 152 + .byte 220 + .byte 154 + .byte 158 + .byte 10 + .byte 14 + .byte 90 + .byte 94 + .byte 218 + .byte 222 + .byte 149 + .byte 208 + .byte 80 + .byte 5 + .byte 85 + .byte 0 + .byte 144 + .byte 213 + .byte 146 + .byte 151 + .byte 2 + .byte 7 + .byte 82 + .byte 87 + .byte 210 + .byte 215 + .byte 157 + .byte 217 + .byte 89 + .byte 13 + .byte 93 + .byte 9 + .byte 153 + .byte 221 + .byte 155 + .byte 159 + .byte 11 + .byte 15 + .byte 91 + .byte 95 + .byte 219 + .byte 223 + .byte 22 + .byte 19 + .byte 131 + .byte 134 + .byte 70 + .byte 67 + .byte 195 + .byte 198 + .byte 65 + .byte 20 + .byte 193 + .byte 132 + .byte 17 + .byte 68 + .byte 129 + .byte 196 + .byte 28 + .byte 72 + .byte 200 + .byte 140 + .byte 76 + .byte 24 + .byte 136 + .byte 204 + .byte 26 + .byte 30 + .byte 138 + .byte 142 + .byte 74 + .byte 78 + .byte 202 + .byte 206 + .byte 53 + .byte 96 + .byte 224 + .byte 165 + .byte 101 + .byte 48 + .byte 160 + .byte 229 + .byte 50 + .byte 55 + .byte 162 + .byte 167 + .byte 98 + .byte 103 + .byte 226 + .byte 231 + .byte 61 + .byte 105 + .byte 233 + .byte 173 + .byte 109 + .byte 57 + .byte 169 + .byte 237 + .byte 59 + .byte 63 + .byte 171 + .byte 175 + .byte 107 + .byte 111 + .byte 235 + .byte 239 + .byte 38 + .byte 35 + .byte 179 + .byte 182 + .byte 118 + .byte 115 + .byte 243 + .byte 246 + .byte 113 + .byte 36 + .byte 241 + .byte 180 + .byte 33 + .byte 116 + .byte 177 + .byte 244 + .byte 44 + .byte 120 + .byte 248 + .byte 188 + .byte 124 + .byte 40 + .byte 184 + .byte 252 + .byte 42 + .byte 46 + .byte 186 + .byte 190 + .byte 122 + .byte 126 + .byte 250 + .byte 254 + .byte 37 + .byte 112 + .byte 240 + .byte 181 + .byte 117 + .byte 32 + .byte 176 + .byte 245 + .byte 34 + .byte 39 + .byte 178 + .byte 183 + .byte 114 + .byte 119 + .byte 242 + .byte 247 + .byte 45 + .byte 121 + .byte 249 + .byte 189 + .byte 125 + .byte 41 + .byte 185 + .byte 253 + .byte 43 + .byte 47 + .byte 187 + .byte 191 + .byte 123 + .byte 127 + .byte 251 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_2, @object + .size table_2, 256 +table_2: + .byte 0 + .byte 2 + .byte 4 + .byte 6 + .byte 8 + .byte 10 + .byte 12 + .byte 14 + .byte 16 + .byte 18 + .byte 20 + .byte 22 + .byte 24 + .byte 26 + .byte 28 + .byte 30 + .byte 32 + .byte 34 + .byte 36 + .byte 38 + .byte 40 + .byte 42 + .byte 44 + .byte 46 + .byte 48 + .byte 50 + .byte 52 + .byte 54 + .byte 56 + .byte 58 + .byte 60 + .byte 62 + .byte 65 + .byte 67 + .byte 69 + .byte 71 + .byte 73 + .byte 75 + .byte 77 + .byte 79 + .byte 81 + .byte 83 + .byte 85 + .byte 87 + .byte 89 + .byte 91 + .byte 93 + .byte 95 + .byte 97 + .byte 99 + .byte 101 + .byte 103 + .byte 105 + .byte 107 + .byte 109 + .byte 111 + .byte 113 + .byte 115 + .byte 117 + .byte 119 + .byte 121 + .byte 123 + .byte 125 + .byte 127 + .byte 128 + .byte 130 + .byte 132 + .byte 134 + .byte 136 + .byte 138 + .byte 140 + .byte 142 + .byte 144 + .byte 146 + .byte 148 + .byte 150 + .byte 152 + .byte 154 + .byte 156 + .byte 158 + .byte 160 + .byte 162 + .byte 164 + .byte 166 + .byte 168 + .byte 170 + .byte 172 + .byte 174 + .byte 176 + .byte 178 + .byte 180 + .byte 182 + .byte 184 + .byte 186 + .byte 188 + .byte 190 + .byte 193 + .byte 195 + .byte 197 + .byte 199 + .byte 201 + .byte 203 + .byte 205 + .byte 207 + .byte 209 + .byte 211 + .byte 213 + .byte 215 + .byte 217 + .byte 219 + .byte 221 + .byte 223 + .byte 225 + .byte 227 + .byte 229 + .byte 231 + .byte 233 + .byte 235 + .byte 237 + .byte 239 + .byte 241 + .byte 243 + .byte 245 + .byte 247 + .byte 249 + .byte 251 + .byte 253 + .byte 255 + .byte 1 + .byte 3 + .byte 5 + .byte 7 + .byte 9 + .byte 11 + .byte 13 + .byte 15 + .byte 17 + .byte 19 + .byte 21 + .byte 23 + .byte 25 + .byte 27 + .byte 29 + .byte 31 + .byte 33 + .byte 35 + .byte 37 + .byte 39 + .byte 41 + .byte 43 + .byte 45 + .byte 47 + .byte 49 + .byte 51 + .byte 53 + .byte 55 + .byte 57 + .byte 59 + .byte 61 + .byte 63 + .byte 64 + .byte 66 + .byte 68 + .byte 70 + .byte 72 + .byte 74 + .byte 76 + .byte 78 + .byte 80 + .byte 82 + .byte 84 + .byte 86 + .byte 88 + .byte 90 + .byte 92 + .byte 94 + .byte 96 + .byte 98 + .byte 100 + .byte 102 + .byte 104 + .byte 106 + .byte 108 + .byte 110 + .byte 112 + .byte 114 + .byte 116 + .byte 118 + .byte 120 + .byte 122 + .byte 124 + .byte 126 + .byte 129 + .byte 131 + .byte 133 + .byte 135 + .byte 137 + .byte 139 + .byte 141 + .byte 143 + .byte 145 + .byte 147 + .byte 149 + .byte 151 + .byte 153 + .byte 155 + .byte 157 + .byte 159 + .byte 161 + .byte 163 + .byte 165 + .byte 167 + .byte 169 + .byte 171 + .byte 173 + .byte 175 + .byte 177 + .byte 179 + .byte 181 + .byte 183 + .byte 185 + .byte 187 + .byte 189 + .byte 191 + .byte 192 + .byte 194 + .byte 196 + .byte 198 + .byte 200 + .byte 202 + .byte 204 + .byte 206 + .byte 208 + .byte 210 + .byte 212 + .byte 214 + .byte 216 + .byte 218 + .byte 220 + .byte 222 + .byte 224 + .byte 226 + .byte 228 + .byte 230 + .byte 232 + .byte 234 + .byte 236 + .byte 238 + .byte 240 + .byte 242 + .byte 244 + .byte 246 + .byte 248 + .byte 250 + .byte 252 + .byte 254 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_3, @object + .size table_3, 256 +table_3: + .byte 0 + .byte 128 + .byte 1 + .byte 129 + .byte 2 + .byte 130 + .byte 3 + .byte 131 + .byte 4 + .byte 132 + .byte 5 + .byte 133 + .byte 6 + .byte 134 + .byte 7 + .byte 135 + .byte 8 + .byte 136 + .byte 9 + .byte 137 + .byte 10 + .byte 138 + .byte 11 + .byte 139 + .byte 12 + .byte 140 + .byte 13 + .byte 141 + .byte 14 + .byte 142 + .byte 15 + .byte 143 + .byte 16 + .byte 144 + .byte 17 + .byte 145 + .byte 18 + .byte 146 + .byte 19 + .byte 147 + .byte 20 + .byte 148 + .byte 21 + .byte 149 + .byte 22 + .byte 150 + .byte 23 + .byte 151 + .byte 24 + .byte 152 + .byte 25 + .byte 153 + .byte 26 + .byte 154 + .byte 27 + .byte 155 + .byte 28 + .byte 156 + .byte 29 + .byte 157 + .byte 30 + .byte 158 + .byte 31 + .byte 159 + .byte 160 + .byte 32 + .byte 161 + .byte 33 + .byte 162 + .byte 34 + .byte 163 + .byte 35 + .byte 164 + .byte 36 + .byte 165 + .byte 37 + .byte 166 + .byte 38 + .byte 167 + .byte 39 + .byte 168 + .byte 40 + .byte 169 + .byte 41 + .byte 170 + .byte 42 + .byte 171 + .byte 43 + .byte 172 + .byte 44 + .byte 173 + .byte 45 + .byte 174 + .byte 46 + .byte 175 + .byte 47 + .byte 176 + .byte 48 + .byte 177 + .byte 49 + .byte 178 + .byte 50 + .byte 179 + .byte 51 + .byte 180 + .byte 52 + .byte 181 + .byte 53 + .byte 182 + .byte 54 + .byte 183 + .byte 55 + .byte 184 + .byte 56 + .byte 185 + .byte 57 + .byte 186 + .byte 58 + .byte 187 + .byte 59 + .byte 188 + .byte 60 + .byte 189 + .byte 61 + .byte 190 + .byte 62 + .byte 191 + .byte 63 + .byte 64 + .byte 192 + .byte 65 + .byte 193 + .byte 66 + .byte 194 + .byte 67 + .byte 195 + .byte 68 + .byte 196 + .byte 69 + .byte 197 + .byte 70 + .byte 198 + .byte 71 + .byte 199 + .byte 72 + .byte 200 + .byte 73 + .byte 201 + .byte 74 + .byte 202 + .byte 75 + .byte 203 + .byte 76 + .byte 204 + .byte 77 + .byte 205 + .byte 78 + .byte 206 + .byte 79 + .byte 207 + .byte 80 + .byte 208 + .byte 81 + .byte 209 + .byte 82 + .byte 210 + .byte 83 + .byte 211 + .byte 84 + .byte 212 + .byte 85 + .byte 213 + .byte 86 + .byte 214 + .byte 87 + .byte 215 + .byte 88 + .byte 216 + .byte 89 + .byte 217 + .byte 90 + .byte 218 + .byte 91 + .byte 219 + .byte 92 + .byte 220 + .byte 93 + .byte 221 + .byte 94 + .byte 222 + .byte 95 + .byte 223 + .byte 224 + .byte 96 + .byte 225 + .byte 97 + .byte 226 + .byte 98 + .byte 227 + .byte 99 + .byte 228 + .byte 100 + .byte 229 + .byte 101 + .byte 230 + .byte 102 + .byte 231 + .byte 103 + .byte 232 + .byte 104 + .byte 233 + .byte 105 + .byte 234 + .byte 106 + .byte 235 + .byte 107 + .byte 236 + .byte 108 + .byte 237 + .byte 109 + .byte 238 + .byte 110 + .byte 239 + .byte 111 + .byte 240 + .byte 112 + .byte 241 + .byte 113 + .byte 242 + .byte 114 + .byte 243 + .byte 115 + .byte 244 + .byte 116 + .byte 245 + .byte 117 + .byte 246 + .byte 118 + .byte 247 + .byte 119 + .byte 248 + .byte 120 + .byte 249 + .byte 121 + .byte 250 + .byte 122 + .byte 251 + .byte 123 + .byte 252 + .byte 124 + .byte 253 + .byte 125 + .byte 254 + .byte 126 + .byte 255 + .byte 127 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_4, @object + .size table_4, 174 +table_4: + .byte 1 + .byte 0 + .byte 3 + .byte 0 + .byte 7 + .byte 0 + .byte 15 + .byte 0 + .byte 15 + .byte 1 + .byte 15 + .byte 3 + .byte 14 + .byte 7 + .byte 13 + .byte 7 + .byte 11 + .byte 7 + .byte 7 + .byte 7 + .byte 15 + .byte 6 + .byte 15 + .byte 5 + .byte 14 + .byte 3 + .byte 12 + .byte 7 + .byte 9 + .byte 7 + .byte 3 + .byte 7 + .byte 7 + .byte 6 + .byte 15 + .byte 4 + .byte 14 + .byte 1 + .byte 13 + .byte 3 + .byte 10 + .byte 7 + .byte 5 + .byte 7 + .byte 11 + .byte 6 + .byte 7 + .byte 5 + .byte 14 + .byte 2 + .byte 12 + .byte 5 + .byte 8 + .byte 3 + .byte 0 + .byte 7 + .byte 1 + .byte 6 + .byte 3 + .byte 4 + .byte 6 + .byte 0 + .byte 13 + .byte 0 + .byte 11 + .byte 1 + .byte 7 + .byte 3 + .byte 14 + .byte 6 + .byte 13 + .byte 5 + .byte 10 + .byte 3 + .byte 4 + .byte 7 + .byte 9 + .byte 6 + .byte 3 + .byte 5 + .byte 6 + .byte 2 + .byte 12 + .byte 4 + .byte 8 + .byte 1 + .byte 1 + .byte 3 + .byte 2 + .byte 6 + .byte 5 + .byte 4 + .byte 10 + .byte 0 + .byte 5 + .byte 1 + .byte 11 + .byte 2 + .byte 6 + .byte 5 + .byte 12 + .byte 2 + .byte 8 + .byte 5 + .byte 0 + .byte 3 + .byte 0 + .byte 6 + .byte 1 + .byte 4 + .byte 2 + .byte 0 + .byte 5 + .byte 0 + .byte 11 + .byte 0 + .byte 7 + .byte 1 + .byte 15 + .byte 2 + .byte 14 + .byte 5 + .byte 12 + .byte 3 + .byte 8 + .byte 7 + .byte 1 + .byte 7 + .byte 3 + .byte 6 + .byte 7 + .byte 4 + .byte 14 + .byte 0 + .byte 13 + .byte 1 + .byte 11 + .byte 3 + .byte 6 + .byte 7 + .byte 13 + .byte 6 + .byte 11 + .byte 5 + .byte 6 + .byte 3 + .byte 12 + .byte 6 + .byte 9 + .byte 5 + .byte 2 + .byte 3 + .byte 4 + .byte 6 + .byte 9 + .byte 4 + .byte 2 + .byte 1 + .byte 5 + .byte 2 + .byte 10 + .byte 4 + .byte 4 + .byte 1 + .byte 9 + .byte 2 + .byte 2 + .byte 5 + .byte 4 + .byte 2 + .byte 8 + .byte 4 + .byte 0 + .byte 1 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_5, @object + .size table_5, 256 +table_5: + .byte 204 + .byte 198 + .byte 201 + .byte 192 + .byte 193 + .byte 202 + .byte 194 + .byte 203 + .byte 195 + .byte 200 + .byte 197 + .byte 205 + .byte 196 + .byte 206 + .byte 199 + .byte 207 + .byte 108 + .byte 102 + .byte 105 + .byte 96 + .byte 97 + .byte 106 + .byte 98 + .byte 107 + .byte 99 + .byte 104 + .byte 101 + .byte 109 + .byte 100 + .byte 110 + .byte 103 + .byte 111 + .byte 156 + .byte 150 + .byte 153 + .byte 144 + .byte 145 + .byte 154 + .byte 146 + .byte 155 + .byte 147 + .byte 152 + .byte 149 + .byte 157 + .byte 148 + .byte 158 + .byte 151 + .byte 159 + .byte 12 + .byte 6 + .byte 9 + .byte 0 + .byte 1 + .byte 10 + .byte 2 + .byte 11 + .byte 3 + .byte 8 + .byte 5 + .byte 13 + .byte 4 + .byte 14 + .byte 7 + .byte 15 + .byte 28 + .byte 22 + .byte 25 + .byte 16 + .byte 17 + .byte 26 + .byte 18 + .byte 27 + .byte 19 + .byte 24 + .byte 21 + .byte 29 + .byte 20 + .byte 30 + .byte 23 + .byte 31 + .byte 172 + .byte 166 + .byte 169 + .byte 160 + .byte 161 + .byte 170 + .byte 162 + .byte 171 + .byte 163 + .byte 168 + .byte 165 + .byte 173 + .byte 164 + .byte 174 + .byte 167 + .byte 175 + .byte 44 + .byte 38 + .byte 41 + .byte 32 + .byte 33 + .byte 42 + .byte 34 + .byte 43 + .byte 35 + .byte 40 + .byte 37 + .byte 45 + .byte 36 + .byte 46 + .byte 39 + .byte 47 + .byte 188 + .byte 182 + .byte 185 + .byte 176 + .byte 177 + .byte 186 + .byte 178 + .byte 187 + .byte 179 + .byte 184 + .byte 181 + .byte 189 + .byte 180 + .byte 190 + .byte 183 + .byte 191 + .byte 60 + .byte 54 + .byte 57 + .byte 48 + .byte 49 + .byte 58 + .byte 50 + .byte 59 + .byte 51 + .byte 56 + .byte 53 + .byte 61 + .byte 52 + .byte 62 + .byte 55 + .byte 63 + .byte 140 + .byte 134 + .byte 137 + .byte 128 + .byte 129 + .byte 138 + .byte 130 + .byte 139 + .byte 131 + .byte 136 + .byte 133 + .byte 141 + .byte 132 + .byte 142 + .byte 135 + .byte 143 + .byte 92 + .byte 86 + .byte 89 + .byte 80 + .byte 81 + .byte 90 + .byte 82 + .byte 91 + .byte 83 + .byte 88 + .byte 85 + .byte 93 + .byte 84 + .byte 94 + .byte 87 + .byte 95 + .byte 220 + .byte 214 + .byte 217 + .byte 208 + .byte 209 + .byte 218 + .byte 210 + .byte 219 + .byte 211 + .byte 216 + .byte 213 + .byte 221 + .byte 212 + .byte 222 + .byte 215 + .byte 223 + .byte 76 + .byte 70 + .byte 73 + .byte 64 + .byte 65 + .byte 74 + .byte 66 + .byte 75 + .byte 67 + .byte 72 + .byte 69 + .byte 77 + .byte 68 + .byte 78 + .byte 71 + .byte 79 + .byte 236 + .byte 230 + .byte 233 + .byte 224 + .byte 225 + .byte 234 + .byte 226 + .byte 235 + .byte 227 + .byte 232 + .byte 229 + .byte 237 + .byte 228 + .byte 238 + .byte 231 + .byte 239 + .byte 124 + .byte 118 + .byte 121 + .byte 112 + .byte 113 + .byte 122 + .byte 114 + .byte 123 + .byte 115 + .byte 120 + .byte 117 + .byte 125 + .byte 116 + .byte 126 + .byte 119 + .byte 127 + .byte 252 + .byte 246 + .byte 249 + .byte 240 + .byte 241 + .byte 250 + .byte 242 + .byte 251 + .byte 243 + .byte 248 + .byte 245 + .byte 253 + .byte 244 + .byte 254 + .byte 247 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_6, @object + .size table_6, 256 +table_6: + .byte 51 + .byte 52 + .byte 54 + .byte 56 + .byte 60 + .byte 58 + .byte 49 + .byte 62 + .byte 57 + .byte 50 + .byte 53 + .byte 55 + .byte 48 + .byte 59 + .byte 61 + .byte 63 + .byte 67 + .byte 68 + .byte 70 + .byte 72 + .byte 76 + .byte 74 + .byte 65 + .byte 78 + .byte 73 + .byte 66 + .byte 69 + .byte 71 + .byte 64 + .byte 75 + .byte 77 + .byte 79 + .byte 99 + .byte 100 + .byte 102 + .byte 104 + .byte 108 + .byte 106 + .byte 97 + .byte 110 + .byte 105 + .byte 98 + .byte 101 + .byte 103 + .byte 96 + .byte 107 + .byte 109 + .byte 111 + .byte 131 + .byte 132 + .byte 134 + .byte 136 + .byte 140 + .byte 138 + .byte 129 + .byte 142 + .byte 137 + .byte 130 + .byte 133 + .byte 135 + .byte 128 + .byte 139 + .byte 141 + .byte 143 + .byte 195 + .byte 196 + .byte 198 + .byte 200 + .byte 204 + .byte 202 + .byte 193 + .byte 206 + .byte 201 + .byte 194 + .byte 197 + .byte 199 + .byte 192 + .byte 203 + .byte 205 + .byte 207 + .byte 163 + .byte 164 + .byte 166 + .byte 168 + .byte 172 + .byte 170 + .byte 161 + .byte 174 + .byte 169 + .byte 162 + .byte 165 + .byte 167 + .byte 160 + .byte 171 + .byte 173 + .byte 175 + .byte 19 + .byte 20 + .byte 22 + .byte 24 + .byte 28 + .byte 26 + .byte 17 + .byte 30 + .byte 25 + .byte 18 + .byte 21 + .byte 23 + .byte 16 + .byte 27 + .byte 29 + .byte 31 + .byte 227 + .byte 228 + .byte 230 + .byte 232 + .byte 236 + .byte 234 + .byte 225 + .byte 238 + .byte 233 + .byte 226 + .byte 229 + .byte 231 + .byte 224 + .byte 235 + .byte 237 + .byte 239 + .byte 147 + .byte 148 + .byte 150 + .byte 152 + .byte 156 + .byte 154 + .byte 145 + .byte 158 + .byte 153 + .byte 146 + .byte 149 + .byte 151 + .byte 144 + .byte 155 + .byte 157 + .byte 159 + .byte 35 + .byte 36 + .byte 38 + .byte 40 + .byte 44 + .byte 42 + .byte 33 + .byte 46 + .byte 41 + .byte 34 + .byte 37 + .byte 39 + .byte 32 + .byte 43 + .byte 45 + .byte 47 + .byte 83 + .byte 84 + .byte 86 + .byte 88 + .byte 92 + .byte 90 + .byte 81 + .byte 94 + .byte 89 + .byte 82 + .byte 85 + .byte 87 + .byte 80 + .byte 91 + .byte 93 + .byte 95 + .byte 115 + .byte 116 + .byte 118 + .byte 120 + .byte 124 + .byte 122 + .byte 113 + .byte 126 + .byte 121 + .byte 114 + .byte 117 + .byte 119 + .byte 112 + .byte 123 + .byte 125 + .byte 127 + .byte 3 + .byte 4 + .byte 6 + .byte 8 + .byte 12 + .byte 10 + .byte 1 + .byte 14 + .byte 9 + .byte 2 + .byte 5 + .byte 7 + .byte 0 + .byte 11 + .byte 13 + .byte 15 + .byte 179 + .byte 180 + .byte 182 + .byte 184 + .byte 188 + .byte 186 + .byte 177 + .byte 190 + .byte 185 + .byte 178 + .byte 181 + .byte 183 + .byte 176 + .byte 187 + .byte 189 + .byte 191 + .byte 211 + .byte 212 + .byte 214 + .byte 216 + .byte 220 + .byte 218 + .byte 209 + .byte 222 + .byte 217 + .byte 210 + .byte 213 + .byte 215 + .byte 208 + .byte 219 + .byte 221 + .byte 223 + .byte 243 + .byte 244 + .byte 246 + .byte 248 + .byte 252 + .byte 250 + .byte 241 + .byte 254 + .byte 249 + .byte 242 + .byte 245 + .byte 247 + .byte 240 + .byte 251 + .byte 253 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_7, @object + .size table_7, 256 +table_7: + .byte 0 + .byte 2 + .byte 4 + .byte 6 + .byte 9 + .byte 11 + .byte 13 + .byte 15 + .byte 1 + .byte 3 + .byte 5 + .byte 7 + .byte 8 + .byte 10 + .byte 12 + .byte 14 + .byte 32 + .byte 34 + .byte 36 + .byte 38 + .byte 41 + .byte 43 + .byte 45 + .byte 47 + .byte 33 + .byte 35 + .byte 37 + .byte 39 + .byte 40 + .byte 42 + .byte 44 + .byte 46 + .byte 64 + .byte 66 + .byte 68 + .byte 70 + .byte 73 + .byte 75 + .byte 77 + .byte 79 + .byte 65 + .byte 67 + .byte 69 + .byte 71 + .byte 72 + .byte 74 + .byte 76 + .byte 78 + .byte 96 + .byte 98 + .byte 100 + .byte 102 + .byte 105 + .byte 107 + .byte 109 + .byte 111 + .byte 97 + .byte 99 + .byte 101 + .byte 103 + .byte 104 + .byte 106 + .byte 108 + .byte 110 + .byte 144 + .byte 146 + .byte 148 + .byte 150 + .byte 153 + .byte 155 + .byte 157 + .byte 159 + .byte 145 + .byte 147 + .byte 149 + .byte 151 + .byte 152 + .byte 154 + .byte 156 + .byte 158 + .byte 176 + .byte 178 + .byte 180 + .byte 182 + .byte 185 + .byte 187 + .byte 189 + .byte 191 + .byte 177 + .byte 179 + .byte 181 + .byte 183 + .byte 184 + .byte 186 + .byte 188 + .byte 190 + .byte 208 + .byte 210 + .byte 212 + .byte 214 + .byte 217 + .byte 219 + .byte 221 + .byte 223 + .byte 209 + .byte 211 + .byte 213 + .byte 215 + .byte 216 + .byte 218 + .byte 220 + .byte 222 + .byte 240 + .byte 242 + .byte 244 + .byte 246 + .byte 249 + .byte 251 + .byte 253 + .byte 255 + .byte 241 + .byte 243 + .byte 245 + .byte 247 + .byte 248 + .byte 250 + .byte 252 + .byte 254 + .byte 16 + .byte 18 + .byte 20 + .byte 22 + .byte 25 + .byte 27 + .byte 29 + .byte 31 + .byte 17 + .byte 19 + .byte 21 + .byte 23 + .byte 24 + .byte 26 + .byte 28 + .byte 30 + .byte 48 + .byte 50 + .byte 52 + .byte 54 + .byte 57 + .byte 59 + .byte 61 + .byte 63 + .byte 49 + .byte 51 + .byte 53 + .byte 55 + .byte 56 + .byte 58 + .byte 60 + .byte 62 + .byte 80 + .byte 82 + .byte 84 + .byte 86 + .byte 89 + .byte 91 + .byte 93 + .byte 95 + .byte 81 + .byte 83 + .byte 85 + .byte 87 + .byte 88 + .byte 90 + .byte 92 + .byte 94 + .byte 112 + .byte 114 + .byte 116 + .byte 118 + .byte 121 + .byte 123 + .byte 125 + .byte 127 + .byte 113 + .byte 115 + .byte 117 + .byte 119 + .byte 120 + .byte 122 + .byte 124 + .byte 126 + .byte 128 + .byte 130 + .byte 132 + .byte 134 + .byte 137 + .byte 139 + .byte 141 + .byte 143 + .byte 129 + .byte 131 + .byte 133 + .byte 135 + .byte 136 + .byte 138 + .byte 140 + .byte 142 + .byte 160 + .byte 162 + .byte 164 + .byte 166 + .byte 169 + .byte 171 + .byte 173 + .byte 175 + .byte 161 + .byte 163 + .byte 165 + .byte 167 + .byte 168 + .byte 170 + .byte 172 + .byte 174 + .byte 192 + .byte 194 + .byte 196 + .byte 198 + .byte 201 + .byte 203 + .byte 205 + .byte 207 + .byte 193 + .byte 195 + .byte 197 + .byte 199 + .byte 200 + .byte 202 + .byte 204 + .byte 206 + .byte 224 + .byte 226 + .byte 228 + .byte 230 + .byte 233 + .byte 235 + .byte 237 + .byte 239 + .byte 225 + .byte 227 + .byte 229 + .byte 231 + .byte 232 + .byte 234 + .byte 236 + .byte 238 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_8, @object + .size table_8, 256 +table_8: + .byte 0 + .byte 8 + .byte 1 + .byte 9 + .byte 2 + .byte 10 + .byte 3 + .byte 11 + .byte 12 + .byte 4 + .byte 13 + .byte 5 + .byte 14 + .byte 6 + .byte 15 + .byte 7 + .byte 128 + .byte 136 + .byte 129 + .byte 137 + .byte 130 + .byte 138 + .byte 131 + .byte 139 + .byte 140 + .byte 132 + .byte 141 + .byte 133 + .byte 142 + .byte 134 + .byte 143 + .byte 135 + .byte 16 + .byte 24 + .byte 17 + .byte 25 + .byte 18 + .byte 26 + .byte 19 + .byte 27 + .byte 28 + .byte 20 + .byte 29 + .byte 21 + .byte 30 + .byte 22 + .byte 31 + .byte 23 + .byte 144 + .byte 152 + .byte 145 + .byte 153 + .byte 146 + .byte 154 + .byte 147 + .byte 155 + .byte 156 + .byte 148 + .byte 157 + .byte 149 + .byte 158 + .byte 150 + .byte 159 + .byte 151 + .byte 32 + .byte 40 + .byte 33 + .byte 41 + .byte 34 + .byte 42 + .byte 35 + .byte 43 + .byte 44 + .byte 36 + .byte 45 + .byte 37 + .byte 46 + .byte 38 + .byte 47 + .byte 39 + .byte 160 + .byte 168 + .byte 161 + .byte 169 + .byte 162 + .byte 170 + .byte 163 + .byte 171 + .byte 172 + .byte 164 + .byte 173 + .byte 165 + .byte 174 + .byte 166 + .byte 175 + .byte 167 + .byte 48 + .byte 56 + .byte 49 + .byte 57 + .byte 50 + .byte 58 + .byte 51 + .byte 59 + .byte 60 + .byte 52 + .byte 61 + .byte 53 + .byte 62 + .byte 54 + .byte 63 + .byte 55 + .byte 176 + .byte 184 + .byte 177 + .byte 185 + .byte 178 + .byte 186 + .byte 179 + .byte 187 + .byte 188 + .byte 180 + .byte 189 + .byte 181 + .byte 190 + .byte 182 + .byte 191 + .byte 183 + .byte 192 + .byte 200 + .byte 193 + .byte 201 + .byte 194 + .byte 202 + .byte 195 + .byte 203 + .byte 204 + .byte 196 + .byte 205 + .byte 197 + .byte 206 + .byte 198 + .byte 207 + .byte 199 + .byte 64 + .byte 72 + .byte 65 + .byte 73 + .byte 66 + .byte 74 + .byte 67 + .byte 75 + .byte 76 + .byte 68 + .byte 77 + .byte 69 + .byte 78 + .byte 70 + .byte 79 + .byte 71 + .byte 208 + .byte 216 + .byte 209 + .byte 217 + .byte 210 + .byte 218 + .byte 211 + .byte 219 + .byte 220 + .byte 212 + .byte 221 + .byte 213 + .byte 222 + .byte 214 + .byte 223 + .byte 215 + .byte 80 + .byte 88 + .byte 81 + .byte 89 + .byte 82 + .byte 90 + .byte 83 + .byte 91 + .byte 92 + .byte 84 + .byte 93 + .byte 85 + .byte 94 + .byte 86 + .byte 95 + .byte 87 + .byte 224 + .byte 232 + .byte 225 + .byte 233 + .byte 226 + .byte 234 + .byte 227 + .byte 235 + .byte 236 + .byte 228 + .byte 237 + .byte 229 + .byte 238 + .byte 230 + .byte 239 + .byte 231 + .byte 96 + .byte 104 + .byte 97 + .byte 105 + .byte 98 + .byte 106 + .byte 99 + .byte 107 + .byte 108 + .byte 100 + .byte 109 + .byte 101 + .byte 110 + .byte 102 + .byte 111 + .byte 103 + .byte 240 + .byte 248 + .byte 241 + .byte 249 + .byte 242 + .byte 250 + .byte 243 + .byte 251 + .byte 252 + .byte 244 + .byte 253 + .byte 245 + .byte 254 + .byte 246 + .byte 255 + .byte 247 + .byte 112 + .byte 120 + .byte 113 + .byte 121 + .byte 114 + .byte 122 + .byte 115 + .byte 123 + .byte 124 + .byte 116 + .byte 125 + .byte 117 + .byte 126 + .byte 118 + .byte 127 + .byte 119 + + .text +.global forkskinny_128_256_rounds + .type forkskinny_128_256_rounds, @function +forkskinny_128_256_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 51 + ldd r2,Z+32 + ldd r3,Z+33 + ldd r4,Z+34 + ldd r5,Z+35 + ldd r6,Z+36 + ldd r7,Z+37 + ldd r8,Z+38 + ldd r9,Z+39 + ldd r10,Z+40 + ldd r11,Z+41 + ldd r12,Z+42 + ldd r13,Z+43 + ldd r14,Z+44 + ldd r15,Z+45 + ldd r24,Z+46 + ldd r25,Z+47 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + lsl r20 + std Y+33,r20 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +86: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + mov r0,r14 + mov r14,r15 + mov r15,r24 + mov r24,r25 + mov r25,r0 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + movw r18,r14 + movw r26,r24 + eor r18,r10 + eor r19,r11 + eor r26,r12 + eor r27,r13 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r18 + movw r4,r26 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + ldd r0,Y+1 + std Y+9,r0 + ldd r0,Y+2 + std Y+10,r0 + ldd r0,Y+3 + std Y+11,r0 + ldd r0,Y+4 + std Y+12,r0 + ldd r0,Y+5 + std Y+13,r0 + ldd r0,Y+6 + std Y+14,r0 + ldd r0,Y+7 + std Y+15,r0 + ldd r0,Y+8 + std Y+16,r0 + std Y+1,r19 + std Y+2,r17 + std Y+3,r18 + std Y+4,r21 + std Y+5,r26 + std Y+6,r16 + std Y+7,r20 + std Y+8,r27 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r16,Y+31 + ldd r17,Y+32 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+17 + std Y+25,r0 + ldd r0,Y+18 + std Y+26,r0 + ldd r0,Y+19 + std Y+27,r0 + ldd r0,Y+20 + std Y+28,r0 + ldd r0,Y+21 + std Y+29,r0 + ldd r0,Y+22 + std Y+30,r0 + ldd r0,Y+23 + std Y+31,r0 + ldd r0,Y+24 + std Y+32,r0 + std Y+17,r19 + std Y+18,r17 + std Y+19,r18 + std Y+20,r21 + std Y+21,r26 + std Y+22,r16 + std Y+23,r20 + std Y+24,r27 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + ldd r18,Y+33 + cp r22,r18 + breq 5259f + rjmp 86b +5259: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+32,r2 + std Z+33,r3 + std Z+34,r4 + std Z+35,r5 + std Z+36,r6 + std Z+37,r7 + std Z+38,r8 + std Z+39,r9 + std Z+40,r10 + std Z+41,r11 + std Z+42,r12 + std Z+43,r13 + std Z+44,r14 + std Z+45,r15 + std Z+46,r24 + std Z+47,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + adiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_256_rounds, .-forkskinny_128_256_rounds + + .text +.global forkskinny_128_256_inv_rounds + .type forkskinny_128_256_inv_rounds, @function +forkskinny_128_256_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 51 + ldd r2,Z+32 + ldd r3,Z+33 + ldd r4,Z+34 + ldd r5,Z+35 + ldd r6,Z+36 + ldd r7,Z+37 + ldd r8,Z+38 + ldd r9,Z+39 + ldd r10,Z+40 + ldd r11,Z+41 + ldd r12,Z+42 + ldd r13,Z+43 + ldd r14,Z+44 + ldd r15,Z+45 + ldd r24,Z+46 + ldd r25,Z+47 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + lsl r20 + std Y+33,r20 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +86: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + ldd r0,Y+9 + std Y+1,r0 + ldd r0,Y+10 + std Y+2,r0 + ldd r0,Y+11 + std Y+3,r0 + ldd r0,Y+12 + std Y+4,r0 + ldd r0,Y+13 + std Y+5,r0 + ldd r0,Y+14 + std Y+6,r0 + ldd r0,Y+15 + std Y+7,r0 + ldd r0,Y+16 + std Y+8,r0 + std Y+9,r26 + std Y+10,r18 + std Y+11,r20 + std Y+12,r17 + std Y+13,r16 + std Y+14,r27 + std Y+15,r21 + std Y+16,r19 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r16,Y+23 + ldd r17,Y+24 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+25 + std Y+17,r0 + ldd r0,Y+26 + std Y+18,r0 + ldd r0,Y+27 + std Y+19,r0 + ldd r0,Y+28 + std Y+20,r0 + ldd r0,Y+29 + std Y+21,r0 + ldd r0,Y+30 + std Y+22,r0 + ldd r0,Y+31 + std Y+23,r0 + ldd r0,Y+32 + std Y+24,r0 + std Y+25,r26 + std Y+26,r18 + std Y+27,r20 + std Y+28,r17 + std Y+29,r16 + std Y+30,r27 + std Y+31,r21 + std Y+32,r19 + movw r18,r2 + movw r26,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r18 + movw r24,r26 + eor r14,r10 + eor r15,r11 + eor r24,r12 + eor r25,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + mov r0,r6 + mov r6,r7 + mov r7,r8 + mov r8,r9 + mov r9,r0 + mov r0,r10 + mov r10,r12 + mov r12,r0 + mov r0,r11 + mov r11,r13 + mov r13,r0 + mov r0,r25 + mov r25,r24 + mov r24,r15 + mov r15,r14 + mov r14,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + ldi r30,lo8(table_1) + ldi r31,hi8(table_1) +#if defined(RAMPZ) + ldi r18,hh8(table_1) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+33 + cp r22,r18 + breq 5259f + rjmp 86b +5259: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+32,r2 + std Z+33,r3 + std Z+34,r4 + std Z+35,r5 + std Z+36,r6 + std Z+37,r7 + std Z+38,r8 + std Z+39,r9 + std Z+40,r10 + std Z+41,r11 + std Z+42,r12 + std Z+43,r13 + std Z+44,r14 + std Z+45,r15 + std Z+46,r24 + std Z+47,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + adiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_256_inv_rounds, .-forkskinny_128_256_inv_rounds + + .text +.global forkskinny_128_256_forward_tk + .type forkskinny_128_256_forward_tk, @function +forkskinny_128_256_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 34 + ld r4,Z + ldd r5,Z+1 + ldd r6,Z+2 + ldd r7,Z+3 + ldd r8,Z+4 + ldd r9,Z+5 + ldd r10,Z+6 + ldd r11,Z+7 + ldd r12,Z+8 + ldd r13,Z+9 + ldd r14,Z+10 + ldd r15,Z+11 + ldd r24,Z+12 + ldd r25,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + push r31 + push r30 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r23,hh8(table_2) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +51: + movw r18,r12 + movw r20,r14 + movw r26,r24 + movw r2,r16 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + mov r4,r19 + mov r5,r3 + mov r6,r18 + mov r7,r27 + mov r8,r20 + mov r9,r2 + mov r10,r26 + mov r11,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r26,Y+13 + ldd r27,Y+14 + ldd r2,Y+15 + ldd r3,Y+16 + ldd r23,Y+1 + std Y+9,r23 + ldd r23,Y+2 + std Y+10,r23 + ldd r23,Y+3 + std Y+11,r23 + ldd r23,Y+4 + std Y+12,r23 + ldd r23,Y+5 + std Y+13,r23 + ldd r23,Y+6 + std Y+14,r23 + ldd r23,Y+7 + std Y+15,r23 + ldd r23,Y+8 + std Y+16,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+1,r19 + std Y+2,r3 + std Y+3,r18 + std Y+4,r27 + std Y+5,r20 + std Y+6,r2 + std Y+7,r26 + std Y+8,r21 + dec r22 + breq 5109f + rjmp 51b +5109: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r4 + std Z+1,r5 + std Z+2,r6 + std Z+3,r7 + std Z+4,r8 + std Z+5,r9 + std Z+6,r10 + std Z+7,r11 + std Z+8,r12 + std Z+9,r13 + std Z+10,r14 + std Z+11,r15 + std Z+12,r24 + std Z+13,r25 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_256_forward_tk, .-forkskinny_128_256_forward_tk + + .text +.global forkskinny_128_256_reverse_tk + .type forkskinny_128_256_reverse_tk, @function +forkskinny_128_256_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 34 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r23,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +51: + movw r18,r2 + movw r20,r4 + movw r26,r6 + movw r24,r8 + movw r2,r10 + movw r4,r12 + movw r6,r14 + movw r8,r16 + mov r10,r20 + mov r11,r18 + mov r12,r26 + mov r13,r25 + mov r14,r24 + mov r15,r21 + mov r16,r27 + mov r17,r19 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r24,Y+7 + ldd r25,Y+8 + ldd r23,Y+9 + std Y+1,r23 + ldd r23,Y+10 + std Y+2,r23 + ldd r23,Y+11 + std Y+3,r23 + ldd r23,Y+12 + std Y+4,r23 + ldd r23,Y+13 + std Y+5,r23 + ldd r23,Y+14 + std Y+6,r23 + ldd r23,Y+15 + std Y+7,r23 + ldd r23,Y+16 + std Y+8,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+9,r20 + std Y+10,r18 + std Y+11,r26 + std Y+12,r25 + std Y+13,r24 + std Y+14,r21 + std Y+15,r27 + std Y+16,r19 + dec r22 + breq 5109f + rjmp 51b +5109: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r2 + std Z+1,r3 + std Z+2,r4 + std Z+3,r5 + std Z+4,r6 + std Z+5,r7 + std Z+6,r8 + std Z+7,r9 + std Z+8,r10 + std Z+9,r11 + std Z+10,r12 + std Z+11,r13 + std Z+12,r14 + std Z+13,r15 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_256_reverse_tk, .-forkskinny_128_256_reverse_tk + + .text +.global forkskinny_128_384_rounds + .type forkskinny_128_384_rounds, @function +forkskinny_128_384_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 67 + ldd r2,Z+48 + ldd r3,Z+49 + ldd r4,Z+50 + ldd r5,Z+51 + ldd r6,Z+52 + ldd r7,Z+53 + ldd r8,Z+54 + ldd r9,Z+55 + ldd r10,Z+56 + ldd r11,Z+57 + ldd r12,Z+58 + ldd r13,Z+59 + ldd r14,Z+60 + ldd r15,Z+61 + ldd r24,Z+62 + ldd r25,Z+63 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r26,Z+34 + ldd r27,Z+35 + std Y+33,r18 + std Y+34,r19 + std Y+35,r26 + std Y+36,r27 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + std Y+37,r18 + std Y+38,r19 + std Y+39,r26 + std Y+40,r27 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r26,Z+42 + ldd r27,Z+43 + std Y+41,r18 + std Y+42,r19 + std Y+43,r26 + std Y+44,r27 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r26,Z+46 + ldd r27,Z+47 + std Y+45,r18 + std Y+46,r19 + std Y+47,r26 + std Y+48,r27 + lsl r20 + std Y+49,r20 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +118: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + mov r0,r14 + mov r14,r15 + mov r15,r24 + mov r24,r25 + mov r25,r0 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + movw r18,r14 + movw r26,r24 + eor r18,r10 + eor r19,r11 + eor r26,r12 + eor r27,r13 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r18 + movw r4,r26 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + ldd r0,Y+1 + std Y+9,r0 + ldd r0,Y+2 + std Y+10,r0 + ldd r0,Y+3 + std Y+11,r0 + ldd r0,Y+4 + std Y+12,r0 + ldd r0,Y+5 + std Y+13,r0 + ldd r0,Y+6 + std Y+14,r0 + ldd r0,Y+7 + std Y+15,r0 + ldd r0,Y+8 + std Y+16,r0 + std Y+1,r19 + std Y+2,r17 + std Y+3,r18 + std Y+4,r21 + std Y+5,r26 + std Y+6,r16 + std Y+7,r20 + std Y+8,r27 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r16,Y+31 + ldd r17,Y+32 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+17 + std Y+25,r0 + ldd r0,Y+18 + std Y+26,r0 + ldd r0,Y+19 + std Y+27,r0 + ldd r0,Y+20 + std Y+28,r0 + ldd r0,Y+21 + std Y+29,r0 + ldd r0,Y+22 + std Y+30,r0 + ldd r0,Y+23 + std Y+31,r0 + ldd r0,Y+24 + std Y+32,r0 + std Y+17,r19 + std Y+18,r17 + std Y+19,r18 + std Y+20,r21 + std Y+21,r26 + std Y+22,r16 + std Y+23,r20 + std Y+24,r27 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + ldd r20,Y+45 + ldd r21,Y+46 + ldd r16,Y+47 + ldd r17,Y+48 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+33 + std Y+41,r0 + ldd r0,Y+34 + std Y+42,r0 + ldd r0,Y+35 + std Y+43,r0 + ldd r0,Y+36 + std Y+44,r0 + ldd r0,Y+37 + std Y+45,r0 + ldd r0,Y+38 + std Y+46,r0 + ldd r0,Y+39 + std Y+47,r0 + ldd r0,Y+40 + std Y+48,r0 + std Y+33,r19 + std Y+34,r17 + std Y+35,r18 + std Y+36,r21 + std Y+37,r26 + std Y+38,r16 + std Y+39,r20 + std Y+40,r27 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + ldd r18,Y+49 + cp r22,r18 + breq 5348f + rjmp 118b +5348: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+48,r2 + std Z+49,r3 + std Z+50,r4 + std Z+51,r5 + std Z+52,r6 + std Z+53,r7 + std Z+54,r8 + std Z+55,r9 + std Z+56,r10 + std Z+57,r11 + std Z+58,r12 + std Z+59,r13 + std Z+60,r14 + std Z+61,r15 + std Z+62,r24 + std Z+63,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + std Z+32,r18 + std Z+33,r19 + std Z+34,r26 + std Z+35,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + std Z+36,r18 + std Z+37,r19 + std Z+38,r26 + std Z+39,r27 + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + std Z+40,r18 + std Z+41,r19 + std Z+42,r26 + std Z+43,r27 + ldd r18,Y+45 + ldd r19,Y+46 + ldd r26,Y+47 + ldd r27,Y+48 + std Z+44,r18 + std Z+45,r19 + std Z+46,r26 + std Z+47,r27 + adiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_384_rounds, .-forkskinny_128_384_rounds + + .text +.global forkskinny_128_384_inv_rounds + .type forkskinny_128_384_inv_rounds, @function +forkskinny_128_384_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 67 + ldd r2,Z+48 + ldd r3,Z+49 + ldd r4,Z+50 + ldd r5,Z+51 + ldd r6,Z+52 + ldd r7,Z+53 + ldd r8,Z+54 + ldd r9,Z+55 + ldd r10,Z+56 + ldd r11,Z+57 + ldd r12,Z+58 + ldd r13,Z+59 + ldd r14,Z+60 + ldd r15,Z+61 + ldd r24,Z+62 + ldd r25,Z+63 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r26,Z+34 + ldd r27,Z+35 + std Y+33,r18 + std Y+34,r19 + std Y+35,r26 + std Y+36,r27 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + std Y+37,r18 + std Y+38,r19 + std Y+39,r26 + std Y+40,r27 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r26,Z+42 + ldd r27,Z+43 + std Y+41,r18 + std Y+42,r19 + std Y+43,r26 + std Y+44,r27 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r26,Z+46 + ldd r27,Z+47 + std Y+45,r18 + std Y+46,r19 + std Y+47,r26 + std Y+48,r27 + lsl r20 + std Y+49,r20 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +118: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + ldd r0,Y+9 + std Y+1,r0 + ldd r0,Y+10 + std Y+2,r0 + ldd r0,Y+11 + std Y+3,r0 + ldd r0,Y+12 + std Y+4,r0 + ldd r0,Y+13 + std Y+5,r0 + ldd r0,Y+14 + std Y+6,r0 + ldd r0,Y+15 + std Y+7,r0 + ldd r0,Y+16 + std Y+8,r0 + std Y+9,r26 + std Y+10,r18 + std Y+11,r20 + std Y+12,r17 + std Y+13,r16 + std Y+14,r27 + std Y+15,r21 + std Y+16,r19 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r16,Y+23 + ldd r17,Y+24 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+25 + std Y+17,r0 + ldd r0,Y+26 + std Y+18,r0 + ldd r0,Y+27 + std Y+19,r0 + ldd r0,Y+28 + std Y+20,r0 + ldd r0,Y+29 + std Y+21,r0 + ldd r0,Y+30 + std Y+22,r0 + ldd r0,Y+31 + std Y+23,r0 + ldd r0,Y+32 + std Y+24,r0 + std Y+25,r26 + std Y+26,r18 + std Y+27,r20 + std Y+28,r17 + std Y+29,r16 + std Y+30,r27 + std Y+31,r21 + std Y+32,r19 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + ldd r20,Y+37 + ldd r21,Y+38 + ldd r16,Y+39 + ldd r17,Y+40 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+41 + std Y+33,r0 + ldd r0,Y+42 + std Y+34,r0 + ldd r0,Y+43 + std Y+35,r0 + ldd r0,Y+44 + std Y+36,r0 + ldd r0,Y+45 + std Y+37,r0 + ldd r0,Y+46 + std Y+38,r0 + ldd r0,Y+47 + std Y+39,r0 + ldd r0,Y+48 + std Y+40,r0 + std Y+41,r26 + std Y+42,r18 + std Y+43,r20 + std Y+44,r17 + std Y+45,r16 + std Y+46,r27 + std Y+47,r21 + std Y+48,r19 + movw r18,r2 + movw r26,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r18 + movw r24,r26 + eor r14,r10 + eor r15,r11 + eor r24,r12 + eor r25,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + mov r0,r6 + mov r6,r7 + mov r7,r8 + mov r8,r9 + mov r9,r0 + mov r0,r10 + mov r10,r12 + mov r12,r0 + mov r0,r11 + mov r11,r13 + mov r13,r0 + mov r0,r25 + mov r25,r24 + mov r24,r15 + mov r15,r14 + mov r14,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + ldi r30,lo8(table_1) + ldi r31,hi8(table_1) +#if defined(RAMPZ) + ldi r18,hh8(table_1) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+49 + cp r22,r18 + breq 5348f + rjmp 118b +5348: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+48,r2 + std Z+49,r3 + std Z+50,r4 + std Z+51,r5 + std Z+52,r6 + std Z+53,r7 + std Z+54,r8 + std Z+55,r9 + std Z+56,r10 + std Z+57,r11 + std Z+58,r12 + std Z+59,r13 + std Z+60,r14 + std Z+61,r15 + std Z+62,r24 + std Z+63,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + std Z+32,r18 + std Z+33,r19 + std Z+34,r26 + std Z+35,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + std Z+36,r18 + std Z+37,r19 + std Z+38,r26 + std Z+39,r27 + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + std Z+40,r18 + std Z+41,r19 + std Z+42,r26 + std Z+43,r27 + ldd r18,Y+45 + ldd r19,Y+46 + ldd r26,Y+47 + ldd r27,Y+48 + std Z+44,r18 + std Z+45,r19 + std Z+46,r26 + std Z+47,r27 + adiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_384_inv_rounds, .-forkskinny_128_384_inv_rounds + + .text +.global forkskinny_128_384_forward_tk + .type forkskinny_128_384_forward_tk, @function +forkskinny_128_384_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r4,Z + ldd r5,Z+1 + ldd r6,Z+2 + ldd r7,Z+3 + ldd r8,Z+4 + ldd r9,Z+5 + ldd r10,Z+6 + ldd r11,Z+7 + ldd r12,Z+8 + ldd r13,Z+9 + ldd r14,Z+10 + ldd r15,Z+11 + ldd r24,Z+12 + ldd r25,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + std Y+17,r18 + std Y+18,r19 + std Y+19,r20 + std Y+20,r21 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r20,Z+38 + ldd r21,Z+39 + std Y+21,r18 + std Y+22,r19 + std Y+23,r20 + std Y+24,r21 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r20,Z+42 + ldd r21,Z+43 + std Y+25,r18 + std Y+26,r19 + std Y+27,r20 + std Y+28,r21 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r20,Z+46 + ldd r21,Z+47 + std Y+29,r18 + std Y+30,r19 + std Y+31,r20 + std Y+32,r21 + push r31 + push r30 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r23,hh8(table_2) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +83: + movw r18,r12 + movw r20,r14 + movw r26,r24 + movw r2,r16 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + mov r4,r19 + mov r5,r3 + mov r6,r18 + mov r7,r27 + mov r8,r20 + mov r9,r2 + mov r10,r26 + mov r11,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r26,Y+13 + ldd r27,Y+14 + ldd r2,Y+15 + ldd r3,Y+16 + ldd r23,Y+1 + std Y+9,r23 + ldd r23,Y+2 + std Y+10,r23 + ldd r23,Y+3 + std Y+11,r23 + ldd r23,Y+4 + std Y+12,r23 + ldd r23,Y+5 + std Y+13,r23 + ldd r23,Y+6 + std Y+14,r23 + ldd r23,Y+7 + std Y+15,r23 + ldd r23,Y+8 + std Y+16,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+1,r19 + std Y+2,r3 + std Y+3,r18 + std Y+4,r27 + std Y+5,r20 + std Y+6,r2 + std Y+7,r26 + std Y+8,r21 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + ldd r26,Y+29 + ldd r27,Y+30 + ldd r2,Y+31 + ldd r3,Y+32 + ldd r23,Y+17 + std Y+25,r23 + ldd r23,Y+18 + std Y+26,r23 + ldd r23,Y+19 + std Y+27,r23 + ldd r23,Y+20 + std Y+28,r23 + ldd r23,Y+21 + std Y+29,r23 + ldd r23,Y+22 + std Y+30,r23 + ldd r23,Y+23 + std Y+31,r23 + ldd r23,Y+24 + std Y+32,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+17,r19 + std Y+18,r3 + std Y+19,r18 + std Y+20,r27 + std Y+21,r20 + std Y+22,r2 + std Y+23,r26 + std Y+24,r21 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + breq 5183f + rjmp 83b +5183: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r4 + std Z+1,r5 + std Z+2,r6 + std Z+3,r7 + std Z+4,r8 + std Z+5,r9 + std Z+6,r10 + std Z+7,r11 + std Z+8,r12 + std Z+9,r13 + std Z+10,r14 + std Z+11,r15 + std Z+12,r24 + std Z+13,r25 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r20,Y+23 + ldd r21,Y+24 + std Z+36,r18 + std Z+37,r19 + std Z+38,r20 + std Z+39,r21 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + std Z+40,r18 + std Z+41,r19 + std Z+42,r20 + std Z+43,r21 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r20,Y+31 + ldd r21,Y+32 + std Z+44,r18 + std Z+45,r19 + std Z+46,r20 + std Z+47,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_384_forward_tk, .-forkskinny_128_384_forward_tk + + .text +.global forkskinny_128_384_reverse_tk + .type forkskinny_128_384_reverse_tk, @function +forkskinny_128_384_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + std Y+17,r18 + std Y+18,r19 + std Y+19,r20 + std Y+20,r21 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r20,Z+38 + ldd r21,Z+39 + std Y+21,r18 + std Y+22,r19 + std Y+23,r20 + std Y+24,r21 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r20,Z+42 + ldd r21,Z+43 + std Y+25,r18 + std Y+26,r19 + std Y+27,r20 + std Y+28,r21 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r20,Z+46 + ldd r21,Z+47 + std Y+29,r18 + std Y+30,r19 + std Y+31,r20 + std Y+32,r21 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r23,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +83: + movw r18,r2 + movw r20,r4 + movw r26,r6 + movw r24,r8 + movw r2,r10 + movw r4,r12 + movw r6,r14 + movw r8,r16 + mov r10,r20 + mov r11,r18 + mov r12,r26 + mov r13,r25 + mov r14,r24 + mov r15,r21 + mov r16,r27 + mov r17,r19 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r24,Y+7 + ldd r25,Y+8 + ldd r23,Y+9 + std Y+1,r23 + ldd r23,Y+10 + std Y+2,r23 + ldd r23,Y+11 + std Y+3,r23 + ldd r23,Y+12 + std Y+4,r23 + ldd r23,Y+13 + std Y+5,r23 + ldd r23,Y+14 + std Y+6,r23 + ldd r23,Y+15 + std Y+7,r23 + ldd r23,Y+16 + std Y+8,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+9,r20 + std Y+10,r18 + std Y+11,r26 + std Y+12,r25 + std Y+13,r24 + std Y+14,r21 + std Y+15,r27 + std Y+16,r19 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + ldd r26,Y+21 + ldd r27,Y+22 + ldd r24,Y+23 + ldd r25,Y+24 + ldd r23,Y+25 + std Y+17,r23 + ldd r23,Y+26 + std Y+18,r23 + ldd r23,Y+27 + std Y+19,r23 + ldd r23,Y+28 + std Y+20,r23 + ldd r23,Y+29 + std Y+21,r23 + ldd r23,Y+30 + std Y+22,r23 + ldd r23,Y+31 + std Y+23,r23 + ldd r23,Y+32 + std Y+24,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+25,r20 + std Y+26,r18 + std Y+27,r26 + std Y+28,r25 + std Y+29,r24 + std Y+30,r21 + std Y+31,r27 + std Y+32,r19 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + breq 5183f + rjmp 83b +5183: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r2 + std Z+1,r3 + std Z+2,r4 + std Z+3,r5 + std Z+4,r6 + std Z+5,r7 + std Z+6,r8 + std Z+7,r9 + std Z+8,r10 + std Z+9,r11 + std Z+10,r12 + std Z+11,r13 + std Z+12,r14 + std Z+13,r15 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r20,Y+23 + ldd r21,Y+24 + std Z+36,r18 + std Z+37,r19 + std Z+38,r20 + std Z+39,r21 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + std Z+40,r18 + std Z+41,r19 + std Z+42,r20 + std Z+43,r21 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r20,Y+31 + ldd r21,Y+32 + std Z+44,r18 + std Z+45,r19 + std Z+46,r20 + std Z+47,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_384_reverse_tk, .-forkskinny_128_384_reverse_tk + + .text +.global forkskinny_64_192_rounds + .type forkskinny_64_192_rounds, @function +forkskinny_64_192_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 38 + ldd r26,Z+24 + ldd r27,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + ld r18,Z + ldd r19,Z+1 + std Y+1,r18 + std Y+2,r19 + ldd r18,Z+2 + ldd r19,Z+3 + std Y+3,r18 + std Y+4,r19 + ldd r18,Z+4 + ldd r19,Z+5 + std Y+5,r18 + std Y+6,r19 + ldd r18,Z+6 + ldd r19,Z+7 + std Y+7,r18 + std Y+8,r19 + ldd r18,Z+8 + ldd r19,Z+9 + std Y+9,r18 + std Y+10,r19 + ldd r18,Z+10 + ldd r19,Z+11 + std Y+11,r18 + std Y+12,r19 + ldd r18,Z+12 + ldd r19,Z+13 + std Y+13,r18 + std Y+14,r19 + ldd r18,Z+14 + ldd r19,Z+15 + std Y+15,r18 + std Y+16,r19 + ldd r18,Z+16 + ldd r19,Z+17 + std Y+17,r18 + std Y+18,r19 + ldd r18,Z+18 + ldd r19,Z+19 + std Y+19,r18 + std Y+20,r19 + ldd r18,Z+20 + ldd r19,Z+21 + std Y+21,r18 + std Y+22,r19 + ldd r18,Z+22 + ldd r19,Z+23 + std Y+23,r18 + std Y+24,r19 + push r31 + push r30 + ldi r30,lo8(table_5) + ldi r31,hi8(table_5) +#if defined(RAMPZ) + ldi r18,hh8(table_5) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 + lsl r20 +61: + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + eor r26,r18 + eor r27,r19 + ldd r18,Y+3 + ldd r19,Y+4 + eor r2,r18 + eor r3,r19 + ldd r18,Y+9 + ldd r19,Y+10 + eor r26,r18 + eor r27,r19 + ldd r18,Y+11 + ldd r19,Y+12 + eor r2,r18 + eor r3,r19 + ldd r18,Y+17 + ldd r19,Y+18 + eor r26,r18 + eor r27,r19 + ldd r18,Y+19 + ldd r19,Y+20 + eor r2,r18 + eor r3,r19 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r27,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r3,r18 + ldi r18,32 + eor r5,r18 + eor r26,r18 + mov r0,r1 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + or r3,r0 + mov r0,r4 + mov r4,r5 + mov r5,r0 + mov r0,r6 + mov r6,r7 + mov r7,r0 + mov r0,r1 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + or r7,r0 + eor r2,r4 + eor r3,r5 + eor r4,r26 + eor r5,r27 + movw r18,r6 + eor r18,r4 + eor r19,r5 + movw r6,r4 + movw r4,r2 + movw r2,r26 + movw r26,r18 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r8,Y+3 + ldd r9,Y+4 + ldd r10,Y+5 + ldd r11,Y+6 + ldd r12,Y+7 + ldd r13,Y+8 + std Y+5,r18 + std Y+6,r19 + std Y+7,r8 + std Y+8,r9 + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + ldi r25,240 + and r9,r25 + swap r12 + ldi r24,15 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+1,r18 + std Y+2,r19 + std Y+3,r8 + std Y+4,r9 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r18,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+9 + ldd r19,Y+10 + ldd r8,Y+11 + ldd r9,Y+12 + ldd r10,Y+13 + ldd r11,Y+14 + ldd r12,Y+15 + ldd r13,Y+16 + std Y+13,r18 + std Y+14,r19 + std Y+15,r8 + std Y+16,r9 + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + and r9,r25 + swap r12 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+9,r18 + std Y+10,r19 + std Y+11,r8 + std Y+12,r9 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r8,Y+19 + ldd r9,Y+20 + ldd r10,Y+21 + ldd r11,Y+22 + ldd r12,Y+23 + ldd r13,Y+24 + std Y+21,r18 + std Y+22,r19 + std Y+23,r8 + std Y+24,r9 + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + and r9,r25 + swap r12 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+17,r18 + std Y+18,r19 + std Y+19,r8 + std Y+20,r9 + ldi r30,lo8(table_5) + ldi r31,hi8(table_5) +#if defined(RAMPZ) + ldi r18,hh8(table_5) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + cp r22,r20 + breq 5273f + rjmp 61b +5273: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+24,r26 + std Z+25,r27 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Y+1 + ldd r19,Y+2 + st Z,r18 + std Z+1,r19 + ldd r18,Y+3 + ldd r19,Y+4 + std Z+2,r18 + std Z+3,r19 + ldd r18,Y+5 + ldd r19,Y+6 + std Z+4,r18 + std Z+5,r19 + ldd r18,Y+7 + ldd r19,Y+8 + std Z+6,r18 + std Z+7,r19 + ldd r18,Y+9 + ldd r19,Y+10 + std Z+8,r18 + std Z+9,r19 + ldd r18,Y+11 + ldd r19,Y+12 + std Z+10,r18 + std Z+11,r19 + ldd r18,Y+13 + ldd r19,Y+14 + std Z+12,r18 + std Z+13,r19 + ldd r18,Y+15 + ldd r19,Y+16 + std Z+14,r18 + std Z+15,r19 + ldd r18,Y+17 + ldd r19,Y+18 + std Z+16,r18 + std Z+17,r19 + ldd r18,Y+19 + ldd r19,Y+20 + std Z+18,r18 + std Z+19,r19 + ldd r18,Y+21 + ldd r19,Y+22 + std Z+20,r18 + std Z+21,r19 + ldd r18,Y+23 + ldd r19,Y+24 + std Z+22,r18 + std Z+23,r19 + adiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_rounds, .-forkskinny_64_192_rounds + + .text +.global forkskinny_64_192_inv_rounds + .type forkskinny_64_192_inv_rounds, @function +forkskinny_64_192_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 38 + ldd r26,Z+24 + ldd r27,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + ld r18,Z + ldd r19,Z+1 + std Y+1,r18 + std Y+2,r19 + ldd r18,Z+2 + ldd r19,Z+3 + std Y+3,r18 + std Y+4,r19 + ldd r18,Z+4 + ldd r19,Z+5 + std Y+5,r18 + std Y+6,r19 + ldd r18,Z+6 + ldd r19,Z+7 + std Y+7,r18 + std Y+8,r19 + ldd r18,Z+8 + ldd r19,Z+9 + std Y+9,r18 + std Y+10,r19 + ldd r18,Z+10 + ldd r19,Z+11 + std Y+11,r18 + std Y+12,r19 + ldd r18,Z+12 + ldd r19,Z+13 + std Y+13,r18 + std Y+14,r19 + ldd r18,Z+14 + ldd r19,Z+15 + std Y+15,r18 + std Y+16,r19 + ldd r18,Z+16 + ldd r19,Z+17 + std Y+17,r18 + std Y+18,r19 + ldd r18,Z+18 + ldd r19,Z+19 + std Y+19,r18 + std Y+20,r19 + ldd r18,Z+20 + ldd r19,Z+21 + std Y+21,r18 + std Y+22,r19 + ldd r18,Z+22 + ldd r19,Z+23 + std Y+23,r18 + std Y+24,r19 + push r31 + push r30 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 + lsl r20 +61: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r8,Y+3 + ldd r9,Y+4 + ldd r10,Y+5 + ldd r11,Y+6 + ldd r12,Y+7 + ldd r13,Y+8 + std Y+1,r10 + std Y+2,r11 + std Y+3,r12 + std Y+4,r13 + mov r11,r18 + ldi r25,240 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+5,r10 + std Y+6,r11 + std Y+7,r12 + std Y+8,r13 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r8,Y+11 + ldd r9,Y+12 + ldd r10,Y+13 + ldd r11,Y+14 + ldd r12,Y+15 + ldd r13,Y+16 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r11,r18 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+13,r10 + std Y+14,r11 + std Y+15,r12 + std Y+16,r13 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r18,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r8,Y+19 + ldd r9,Y+20 + ldd r10,Y+21 + ldd r11,Y+22 + ldd r12,Y+23 + ldd r13,Y+24 + std Y+17,r10 + std Y+18,r11 + std Y+19,r12 + std Y+20,r13 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r11,r18 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+21,r10 + std Y+22,r11 + std Y+23,r12 + std Y+24,r13 + movw r18,r26 + movw r26,r2 + movw r2,r4 + movw r4,r6 + movw r6,r18 + eor r6,r4 + eor r7,r5 + eor r4,r26 + eor r5,r27 + eor r2,r4 + eor r3,r5 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + mov r0,r5 + mov r5,r4 + mov r4,r0 + mov r0,r7 + mov r7,r6 + mov r6,r0 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + ldd r18,Y+1 + ldd r19,Y+2 + eor r26,r18 + eor r27,r19 + ldd r18,Y+3 + ldd r19,Y+4 + eor r2,r18 + eor r3,r19 + ldd r18,Y+9 + ldd r19,Y+10 + eor r26,r18 + eor r27,r19 + ldd r18,Y+11 + ldd r19,Y+12 + eor r2,r18 + eor r3,r19 + ldd r18,Y+17 + ldd r19,Y+18 + eor r26,r18 + eor r27,r19 + ldd r18,Y+19 + ldd r19,Y+20 + eor r2,r18 + eor r3,r19 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r3,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r27,r18 + ldi r18,32 + eor r5,r18 + eor r26,r18 + ldi r30,lo8(table_6) + ldi r31,hi8(table_6) +#if defined(RAMPZ) + ldi r18,hh8(table_6) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + cp r22,r20 + breq 5268f + rjmp 61b +5268: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+24,r26 + std Z+25,r27 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Y+1 + ldd r19,Y+2 + st Z,r18 + std Z+1,r19 + ldd r18,Y+3 + ldd r19,Y+4 + std Z+2,r18 + std Z+3,r19 + ldd r18,Y+5 + ldd r19,Y+6 + std Z+4,r18 + std Z+5,r19 + ldd r18,Y+7 + ldd r19,Y+8 + std Z+6,r18 + std Z+7,r19 + ldd r18,Y+9 + ldd r19,Y+10 + std Z+8,r18 + std Z+9,r19 + ldd r18,Y+11 + ldd r19,Y+12 + std Z+10,r18 + std Z+11,r19 + ldd r18,Y+13 + ldd r19,Y+14 + std Z+12,r18 + std Z+13,r19 + ldd r18,Y+15 + ldd r19,Y+16 + std Z+14,r18 + std Z+15,r19 + ldd r18,Y+17 + ldd r19,Y+18 + std Z+16,r18 + std Z+17,r19 + ldd r18,Y+19 + ldd r19,Y+20 + std Z+18,r18 + std Z+19,r19 + ldd r18,Y+21 + ldd r19,Y+22 + std Z+20,r18 + std Z+21,r19 + ldd r18,Y+23 + ldd r19,Y+24 + std Z+22,r18 + std Z+23,r19 + adiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_inv_rounds, .-forkskinny_64_192_inv_rounds + + .text +.global forkskinny_64_192_forward_tk + .type forkskinny_64_192_forward_tk, @function +forkskinny_64_192_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r28,Z+6 + ldd r29,Z+7 + ldd r2,Z+8 + ldd r3,Z+9 + ldd r4,Z+10 + ldd r5,Z+11 + ldd r6,Z+12 + ldd r7,Z+13 + ldd r8,Z+14 + ldd r9,Z+15 + ldd r10,Z+16 + ldd r11,Z+17 + ldd r12,Z+18 + ldd r13,Z+19 + ldd r14,Z+20 + ldd r15,Z+21 + ldd r24,Z+22 + ldd r25,Z+23 + push r31 + push r30 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +27: + push r19 + push r18 + push r21 + push r20 + mov r19,r27 + swap r19 + andi r19,240 + mov r23,r28 + andi r23,15 + or r19,r23 + mov r18,r27 + andi r18,240 + mov r23,r29 + andi r23,15 + or r18,r23 + mov r21,r26 + andi r21,240 + swap r28 + andi r28,15 + or r21,r28 + mov r20,r29 + andi r20,240 + andi r26,15 + or r20,r26 + pop r28 + pop r29 + pop r26 + pop r27 + push r3 + push r2 + push r5 + push r4 + mov r3,r7 + swap r3 + ldi r17,240 + and r3,r17 + mov r23,r8 + andi r23,15 + or r3,r23 + mov r2,r7 + and r2,r17 + mov r23,r9 + andi r23,15 + or r2,r23 + mov r5,r6 + and r5,r17 + swap r8 + ldi r16,15 + and r8,r16 + or r5,r8 + mov r4,r9 + and r4,r17 + and r6,r16 + or r4,r6 + pop r8 + pop r9 + pop r6 + pop r7 + push r11 + push r10 + push r13 + push r12 + mov r11,r15 + swap r11 + and r11,r17 + mov r23,r24 + andi r23,15 + or r11,r23 + mov r10,r15 + and r10,r17 + mov r23,r25 + andi r23,15 + or r10,r23 + mov r13,r14 + and r13,r17 + swap r24 + andi r24,15 + or r13,r24 + mov r12,r25 + and r12,r17 + and r14,r16 + or r12,r14 + pop r24 + pop r25 + pop r14 + pop r15 + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + dec r22 + breq 5125f + rjmp 27b +5125: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r26 + std Z+5,r27 + std Z+6,r28 + std Z+7,r29 + std Z+8,r2 + std Z+9,r3 + std Z+10,r4 + std Z+11,r5 + std Z+12,r6 + std Z+13,r7 + std Z+14,r8 + std Z+15,r9 + std Z+16,r10 + std Z+17,r11 + std Z+18,r12 + std Z+19,r13 + std Z+20,r14 + std Z+21,r15 + std Z+22,r24 + std Z+23,r25 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_forward_tk, .-forkskinny_64_192_forward_tk + + .text +.global forkskinny_64_192_reverse_tk + .type forkskinny_64_192_reverse_tk, @function +forkskinny_64_192_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r28,Z+6 + ldd r29,Z+7 + ldd r2,Z+8 + ldd r3,Z+9 + ldd r4,Z+10 + ldd r5,Z+11 + ldd r6,Z+12 + ldd r7,Z+13 + ldd r8,Z+14 + ldd r9,Z+15 + ldd r10,Z+16 + ldd r11,Z+17 + ldd r12,Z+18 + ldd r13,Z+19 + ldd r14,Z+20 + ldd r15,Z+21 + ldd r24,Z+22 + ldd r25,Z+23 + push r31 + push r30 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +27: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + push r27 + push r26 + push r29 + push r28 + mov r27,r18 + andi r27,240 + mov r23,r19 + swap r23 + andi r23,15 + or r27,r23 + mov r26,r21 + andi r26,240 + mov r23,r20 + andi r23,15 + or r26,r23 + mov r29,r20 + andi r29,240 + andi r18,15 + or r29,r18 + mov r28,r21 + swap r28 + andi r28,240 + andi r19,15 + or r28,r19 + pop r20 + pop r21 + pop r18 + pop r19 + push r7 + push r6 + push r9 + push r8 + mov r7,r2 + ldi r17,240 + and r7,r17 + mov r23,r3 + swap r23 + andi r23,15 + or r7,r23 + mov r6,r5 + and r6,r17 + mov r23,r4 + andi r23,15 + or r6,r23 + mov r9,r4 + and r9,r17 + ldi r16,15 + and r2,r16 + or r9,r2 + mov r8,r5 + swap r8 + and r8,r17 + and r3,r16 + or r8,r3 + pop r4 + pop r5 + pop r2 + pop r3 + push r15 + push r14 + push r25 + push r24 + mov r15,r10 + and r15,r17 + mov r23,r11 + swap r23 + andi r23,15 + or r15,r23 + mov r14,r13 + and r14,r17 + mov r23,r12 + andi r23,15 + or r14,r23 + mov r25,r12 + andi r25,240 + and r10,r16 + or r25,r10 + mov r24,r13 + swap r24 + andi r24,240 + and r11,r16 + or r24,r11 + pop r12 + pop r13 + pop r10 + pop r11 + dec r22 + breq 5125f + rjmp 27b +5125: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r26 + std Z+5,r27 + std Z+6,r28 + std Z+7,r29 + std Z+8,r2 + std Z+9,r3 + std Z+10,r4 + std Z+11,r5 + std Z+12,r6 + std Z+13,r7 + std Z+14,r8 + std Z+15,r9 + std Z+16,r10 + std Z+17,r11 + std Z+18,r12 + std Z+19,r13 + std Z+20,r14 + std Z+21,r15 + std Z+22,r24 + std Z+23,r25 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_reverse_tk, .-forkskinny_64_192_reverse_tk + +#endif diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/rhys/internal-forkskinny.c b/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/rhys/internal-forkskinny.c index b050ff1..6e2ac55 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/rhys/internal-forkskinny.c +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/rhys/internal-forkskinny.c @@ -40,35 +40,10 @@ static unsigned char const RC[87] = { 0x4a, 0x14, 0x29, 0x52, 0x24, 0x48, 0x10 }; -/** - * \brief Number of rounds of ForkSkinny-128-256 before forking. - */ -#define FORKSKINNY_128_256_ROUNDS_BEFORE 21 - -/** - * \brief Number of rounds of ForkSkinny-128-256 after forking. - */ -#define FORKSKINNY_128_256_ROUNDS_AFTER 27 - -/** - * \brief State information for ForkSkinny-128-256. - */ -typedef struct -{ - uint32_t TK1[4]; /**< First part of the tweakey */ - uint32_t TK2[4]; /**< Second part of the tweakey */ - uint32_t S[4]; /**< Current block state */ +#if !defined(__AVR__) -} forkskinny_128_256_state_t; - -/** - * \brief Applies one round of ForkSkinny-128-256. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_256_round - (forkskinny_128_256_state_t *state, unsigned round) +void forkskinny_128_256_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -79,137 +54,52 @@ static void forkskinny_128_256_round s2 = state->S[2]; s3 = state->S[3]; - /* Apply the S-box to all cells in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny128_sbox(s0); + skinny128_sbox(s1); + skinny128_sbox(s2); + skinny128_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); + s2 ^= 0x02; + + /* Shift the cells in the rows right, which moves the cell + * values up closer to the MSB. That is, we do a left rotate + * on the word to rotate the cells in the word right */ + s1 = leftRotate8(s1); + s2 = leftRotate16(s2); + s3 = leftRotate24(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + + /* Permute TK1 and TK2 for the next round */ + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + } /* Save the local variables back to the state */ state->S[0] = s0; state->S[1] = s1; state->S[2] = s2; state->S[3] = s3; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(state->TK1); - skinny128_permute_tk(state->TK2); - skinny128_LFSR2(state->TK2[0]); - skinny128_LFSR2(state->TK2[1]); -} - -void forkskinny_128_256_encrypt - (const unsigned char key[32], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_256_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_128_256_ROUNDS_BEFORE; ++round) { - forkskinny_128_256_round(&state, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint32_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&state, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x08040201U; /* Branching constant */ - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - for (round = (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); ++round) { - forkskinny_128_256_round(&state, round); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&state, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - } } -/** - * \brief Applies one round of ForkSkinny-128-256 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_256_inv_round - (forkskinny_128_256_state_t *state, unsigned round) +void forkskinny_128_256_inv_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -220,39 +110,42 @@ static void forkskinny_128_256_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Permute TK1 and TK2 for the next round */ - skinny128_inv_LFSR2(state->TK2[0]); - skinny128_inv_LFSR2(state->TK2[1]); - skinny128_inv_permute_tk(state->TK1); - skinny128_inv_permute_tk(state->TK2); - - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left, which moves the cell - * values down closer to the LSB. That is, we do a right - * rotate on the word to rotate the cells in the word left */ - s1 = rightRotate8(s1); - s2 = rightRotate16(s2); - s3 = rightRotate24(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1 and TK2 for the next round */ + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left, which moves the cell + * values down closer to the LSB. That is, we do a right + * rotate on the word to rotate the cells in the word left */ + s1 = rightRotate8(s1); + s2 = rightRotate16(s2); + s3 = rightRotate24(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); + s2 ^= 0x02; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny128_inv_sbox(s0); + skinny128_inv_sbox(s1); + skinny128_inv_sbox(s2); + skinny128_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -261,115 +154,64 @@ static void forkskinny_128_256_inv_round state->S[3] = s3; } -void forkskinny_128_256_decrypt - (const unsigned char key[32], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_128_256_forward_tk + (forkskinny_128_256_state_t *state, unsigned rounds) { - forkskinny_128_256_state_t state; - forkskinny_128_256_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); ++round) { - skinny128_permute_tk(state.TK1); - skinny128_permute_tk(state.TK2); - skinny128_LFSR2(state.TK2[0]); - skinny128_LFSR2(state.TK2[1]); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR2(state->TK2[2]); + skinny128_LFSR2(state->TK2[3]); + } + rounds -= 16; } - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); - round > (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); --round) { - forkskinny_128_256_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + --rounds; } +} - /* Remove the branching constant */ - state.S[0] ^= 0x08040201U; - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_128_256_ROUNDS_AFTER; ++round) { - skinny128_inv_LFSR2(state.TK2[0]); - skinny128_inv_LFSR2(state.TK2[1]); - skinny128_inv_permute_tk(state.TK1); - skinny128_inv_permute_tk(state.TK2); +void forkskinny_128_256_reverse_tk + (forkskinny_128_256_state_t *state, unsigned rounds) +{ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR2(state->TK2[2]); + skinny128_inv_LFSR2(state->TK2[3]); + } + rounds -= 16; } - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_128_256_inv_round(&state, round - 1); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&fstate, round); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + --rounds; } - le_store_word32(output_right, fstate.S[0]); - le_store_word32(output_right + 4, fstate.S[1]); - le_store_word32(output_right + 8, fstate.S[2]); - le_store_word32(output_right + 12, fstate.S[3]); } -/** - * \brief Number of rounds of ForkSkinny-128-384 before forking. - */ -#define FORKSKINNY_128_384_ROUNDS_BEFORE 25 - -/** - * \brief Number of rounds of ForkSkinny-128-384 after forking. - */ -#define FORKSKINNY_128_384_ROUNDS_AFTER 31 - -/** - * \brief State information for ForkSkinny-128-384. - */ -typedef struct -{ - uint32_t TK1[4]; /**< First part of the tweakey */ - uint32_t TK2[4]; /**< Second part of the tweakey */ - uint32_t TK3[4]; /**< Third part of the tweakey */ - uint32_t S[4]; /**< Current block state */ - -} forkskinny_128_384_state_t; - -/** - * \brief Applies one round of ForkSkinny-128-384. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_384_round - (forkskinny_128_384_state_t *state, unsigned round) +void forkskinny_128_384_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -380,145 +222,56 @@ static void forkskinny_128_384_round s2 = state->S[2]; s3 = state->S[3]; - /* Apply the S-box to all cells in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - (rc & 0x0F) ^ 0x00020000; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny128_sbox(s0); + skinny128_sbox(s1); + skinny128_sbox(s2); + skinny128_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + (rc & 0x0F) ^ 0x00020000; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); + s2 ^= 0x02; + + /* Shift the cells in the rows right, which moves the cell + * values up closer to the MSB. That is, we do a left rotate + * on the word to rotate the cells in the word right */ + s1 = leftRotate8(s1); + s2 = leftRotate16(s2); + s3 = leftRotate24(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + + /* Permute TK1, TK2, and TK3 for the next round */ + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_permute_tk(state->TK3); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + } /* Save the local variables back to the state */ state->S[0] = s0; state->S[1] = s1; state->S[2] = s2; state->S[3] = s3; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny128_permute_tk(state->TK1); - skinny128_permute_tk(state->TK2); - skinny128_permute_tk(state->TK3); - skinny128_LFSR2(state->TK2[0]); - skinny128_LFSR2(state->TK2[1]); - skinny128_LFSR3(state->TK3[0]); - skinny128_LFSR3(state->TK3[1]); } -void forkskinny_128_384_encrypt - (const unsigned char key[48], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_384_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.TK3[0] = le_load_word32(key + 32); - state.TK3[1] = le_load_word32(key + 36); - state.TK3[2] = le_load_word32(key + 40); - state.TK3[3] = le_load_word32(key + 44); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_128_384_ROUNDS_BEFORE; ++round) { - forkskinny_128_384_round(&state, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint32_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&state, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x08040201U; /* Branching constant */ - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - for (round = (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); ++round) { - forkskinny_128_384_round(&state, round); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&state, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - } -} - -/** - * \brief Applies one round of ForkSkinny-128-384 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_384_inv_round - (forkskinny_128_384_state_t *state, unsigned round) +void forkskinny_128_384_inv_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -529,43 +282,46 @@ static void forkskinny_128_384_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Permute TK1 and TK2 for the next round */ - skinny128_inv_LFSR2(state->TK2[0]); - skinny128_inv_LFSR2(state->TK2[1]); - skinny128_inv_LFSR3(state->TK3[0]); - skinny128_inv_LFSR3(state->TK3[1]); - skinny128_inv_permute_tk(state->TK1); - skinny128_inv_permute_tk(state->TK2); - skinny128_inv_permute_tk(state->TK3); - - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left, which moves the cell - * values down closer to the LSB. That is, we do a right - * rotate on the word to rotate the cells in the word left */ - s1 = rightRotate8(s1); - s2 = rightRotate16(s2); - s3 = rightRotate24(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - (rc & 0x0F) ^ 0x00020000; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1 and TK2 for the next round */ + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + skinny128_inv_permute_tk(state->TK3); + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left, which moves the cell + * values down closer to the LSB. That is, we do a right + * rotate on the word to rotate the cells in the word left */ + s1 = rightRotate8(s1); + s2 = rightRotate16(s2); + s3 = rightRotate24(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + (rc & 0x0F) ^ 0x00020000; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); + s2 ^= 0x02; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny128_inv_sbox(s0); + skinny128_inv_sbox(s1); + skinny128_inv_sbox(s2); + skinny128_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -574,128 +330,78 @@ static void forkskinny_128_384_inv_round state->S[3] = s3; } -void forkskinny_128_384_decrypt - (const unsigned char key[48], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_128_384_forward_tk + (forkskinny_128_384_state_t *state, unsigned rounds) { - forkskinny_128_384_state_t state; - forkskinny_128_384_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.TK3[0] = le_load_word32(key + 32); - state.TK3[1] = le_load_word32(key + 36); - state.TK3[2] = le_load_word32(key + 40); - state.TK3[3] = le_load_word32(key + 44); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); ++round) { - skinny128_permute_tk(state.TK1); - skinny128_permute_tk(state.TK2); - skinny128_permute_tk(state.TK3); - skinny128_LFSR2(state.TK2[0]); - skinny128_LFSR2(state.TK2[1]); - skinny128_LFSR3(state.TK3[0]); - skinny128_LFSR3(state.TK3[1]); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR2(state->TK2[2]); + skinny128_LFSR2(state->TK2[3]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + skinny128_LFSR3(state->TK3[2]); + skinny128_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); - round > (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); --round) { - forkskinny_128_384_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_permute_tk(state->TK3); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + --rounds; } +} - /* Remove the branching constant */ - state.S[0] ^= 0x08040201U; - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_128_384_ROUNDS_AFTER; ++round) { - skinny128_inv_LFSR2(state.TK2[0]); - skinny128_inv_LFSR2(state.TK2[1]); - skinny128_inv_LFSR3(state.TK3[0]); - skinny128_inv_LFSR3(state.TK3[1]); - skinny128_inv_permute_tk(state.TK1); - skinny128_inv_permute_tk(state.TK2); - skinny128_inv_permute_tk(state.TK3); +void forkskinny_128_384_reverse_tk + (forkskinny_128_384_state_t *state, unsigned rounds) +{ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR2(state->TK2[2]); + skinny128_inv_LFSR2(state->TK2[3]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_LFSR3(state->TK3[2]); + skinny128_inv_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_128_384_inv_round(&state, round - 1); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&fstate, round); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + skinny128_inv_permute_tk(state->TK3); + --rounds; } - le_store_word32(output_right, fstate.S[0]); - le_store_word32(output_right + 4, fstate.S[1]); - le_store_word32(output_right + 8, fstate.S[2]); - le_store_word32(output_right + 12, fstate.S[3]); } -/** - * \brief Number of rounds of ForkSkinny-64-192 before forking. - */ -#define FORKSKINNY_64_192_ROUNDS_BEFORE 17 - -/** - * \brief Number of rounds of ForkSkinny-64-192 after forking. - */ -#define FORKSKINNY_64_192_ROUNDS_AFTER 23 - -/** - * \brief State information for ForkSkinny-64-192. - */ -typedef struct -{ - uint16_t TK1[4]; /**< First part of the tweakey */ - uint16_t TK2[4]; /**< Second part of the tweakey */ - uint16_t TK3[4]; /**< Third part of the tweakey */ - uint16_t S[4]; /**< Current block state */ - -} forkskinny_64_192_state_t; - -/** - * \brief Applies one round of ForkSkinny-64-192. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - * - * Note: The cells of each row are order in big-endian nibble order - * so it is easiest to manage the rows in bit-endian byte order. - */ -static void forkskinny_64_192_round - (forkskinny_64_192_state_t *state, unsigned round) +void forkskinny_64_192_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last) { uint16_t s0, s1, s2, s3, temp; uint8_t rc; @@ -706,144 +412,55 @@ static void forkskinny_64_192_round s2 = state->S[2]; s3 = state->S[3]; - /* Apply the S-box to all cells in the state */ - skinny64_sbox(s0); - skinny64_sbox(s1); - skinny64_sbox(s2); - skinny64_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - ((rc & 0x0F) << 12) ^ 0x0020; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ - ((rc & 0x70) << 8); - s2 ^= 0x2000; - - /* Shift the cells in the rows right */ - s1 = rightRotate4_16(s1); - s2 = rightRotate8_16(s2); - s3 = rightRotate12_16(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny64_sbox(s0); + skinny64_sbox(s1); + skinny64_sbox(s2); + skinny64_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + ((rc & 0x0F) << 12) ^ 0x0020; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ + ((rc & 0x70) << 8); + s2 ^= 0x2000; + + /* Shift the cells in the rows right */ + s1 = rightRotate4_16(s1); + s2 = rightRotate8_16(s2); + s3 = rightRotate12_16(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + + /* Permute TK1, TK2, and TK3 for the next round */ + skinny64_permute_tk(state->TK1); + skinny64_permute_tk(state->TK2); + skinny64_permute_tk(state->TK3); + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + } /* Save the local variables back to the state */ state->S[0] = s0; state->S[1] = s1; state->S[2] = s2; state->S[3] = s3; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny64_permute_tk(state->TK1); - skinny64_permute_tk(state->TK2); - skinny64_permute_tk(state->TK3); - skinny64_LFSR2(state->TK2[0]); - skinny64_LFSR2(state->TK2[1]); - skinny64_LFSR3(state->TK3[0]); - skinny64_LFSR3(state->TK3[1]); } -void forkskinny_64_192_encrypt - (const unsigned char key[24], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_64_192_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = be_load_word16(key); - state.TK1[1] = be_load_word16(key + 2); - state.TK1[2] = be_load_word16(key + 4); - state.TK1[3] = be_load_word16(key + 6); - state.TK2[0] = be_load_word16(key + 8); - state.TK2[1] = be_load_word16(key + 10); - state.TK2[2] = be_load_word16(key + 12); - state.TK2[3] = be_load_word16(key + 14); - state.TK3[0] = be_load_word16(key + 16); - state.TK3[1] = be_load_word16(key + 18); - state.TK3[2] = be_load_word16(key + 20); - state.TK3[3] = be_load_word16(key + 22); - state.S[0] = be_load_word16(input); - state.S[1] = be_load_word16(input + 2); - state.S[2] = be_load_word16(input + 4); - state.S[3] = be_load_word16(input + 6); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_64_192_ROUNDS_BEFORE; ++round) { - forkskinny_64_192_round(&state, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint16_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_right, state.S[0]); - be_store_word16(output_right + 2, state.S[1]); - be_store_word16(output_right + 4, state.S[2]); - be_store_word16(output_right + 6, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x1249U; /* Branching constant */ - state.S[1] ^= 0x36daU; - state.S[2] ^= 0x5b7fU; - state.S[3] ^= 0xec81U; - for (round = (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_left, state.S[0]); - be_store_word16(output_left + 2, state.S[1]); - be_store_word16(output_left + 4, state.S[2]); - be_store_word16(output_left + 6, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_right, state.S[0]); - be_store_word16(output_right + 2, state.S[1]); - be_store_word16(output_right + 4, state.S[2]); - be_store_word16(output_right + 6, state.S[3]); - } -} - -/** - * \brief Applies one round of ForkSkinny-64-192 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_64_192_inv_round - (forkskinny_64_192_state_t *state, unsigned round) +void forkskinny_64_192_inv_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last) { uint16_t s0, s1, s2, s3, temp; uint8_t rc; @@ -854,42 +471,45 @@ static void forkskinny_64_192_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Permute TK1, TK2, and TK3 for the next round */ - skinny64_inv_LFSR2(state->TK2[0]); - skinny64_inv_LFSR2(state->TK2[1]); - skinny64_inv_LFSR3(state->TK3[0]); - skinny64_inv_LFSR3(state->TK3[1]); - skinny64_inv_permute_tk(state->TK1); - skinny64_inv_permute_tk(state->TK2); - skinny64_inv_permute_tk(state->TK3); - - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left */ - s1 = leftRotate4_16(s1); - s2 = leftRotate8_16(s2); - s3 = leftRotate12_16(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - ((rc & 0x0F) << 12) ^ 0x0020; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ - ((rc & 0x70) << 8); - s2 ^= 0x2000; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny64_inv_sbox(s0); - skinny64_inv_sbox(s1); - skinny64_inv_sbox(s2); - skinny64_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1, TK2, and TK3 for the next round */ + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_permute_tk(state->TK1); + skinny64_inv_permute_tk(state->TK2); + skinny64_inv_permute_tk(state->TK3); + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left */ + s1 = leftRotate4_16(s1); + s2 = leftRotate8_16(s2); + s3 = leftRotate12_16(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + ((rc & 0x0F) << 12) ^ 0x0020; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ + ((rc & 0x70) << 8); + s2 ^= 0x2000; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny64_inv_sbox(s0); + skinny64_inv_sbox(s1); + skinny64_inv_sbox(s2); + skinny64_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -898,91 +518,74 @@ static void forkskinny_64_192_inv_round state->S[3] = s3; } -void forkskinny_64_192_decrypt - (const unsigned char key[24], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_64_192_forward_tk + (forkskinny_64_192_state_t *state, unsigned rounds) { - forkskinny_64_192_state_t state; - forkskinny_64_192_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = be_load_word16(key); - state.TK1[1] = be_load_word16(key + 2); - state.TK1[2] = be_load_word16(key + 4); - state.TK1[3] = be_load_word16(key + 6); - state.TK2[0] = be_load_word16(key + 8); - state.TK2[1] = be_load_word16(key + 10); - state.TK2[2] = be_load_word16(key + 12); - state.TK2[3] = be_load_word16(key + 14); - state.TK3[0] = be_load_word16(key + 16); - state.TK3[1] = be_load_word16(key + 18); - state.TK3[2] = be_load_word16(key + 20); - state.TK3[3] = be_load_word16(key + 22); - state.S[0] = be_load_word16(input); - state.S[1] = be_load_word16(input + 2); - state.S[2] = be_load_word16(input + 4); - state.S[3] = be_load_word16(input + 6); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); ++round) { - skinny64_permute_tk(state.TK1); - skinny64_permute_tk(state.TK2); - skinny64_permute_tk(state.TK3); - skinny64_LFSR2(state.TK2[0]); - skinny64_LFSR2(state.TK2[1]); - skinny64_LFSR3(state.TK3[0]); - skinny64_LFSR3(state.TK3[1]); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny64_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR2(state->TK2[2]); + skinny64_LFSR2(state->TK2[3]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + skinny64_LFSR3(state->TK3[2]); + skinny64_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); - round > (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); --round) { - forkskinny_64_192_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny64_permute_tk(state->TK1); + skinny64_permute_tk(state->TK2); + skinny64_permute_tk(state->TK3); + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + --rounds; } +} - /* Remove the branching constant */ - state.S[0] ^= 0x1249U; - state.S[1] ^= 0x36daU; - state.S[2] ^= 0x5b7fU; - state.S[3] ^= 0xec81U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_64_192_ROUNDS_AFTER; ++round) { - skinny64_inv_LFSR2(state.TK2[0]); - skinny64_inv_LFSR2(state.TK2[1]); - skinny64_inv_LFSR3(state.TK3[0]); - skinny64_inv_LFSR3(state.TK3[1]); - skinny64_inv_permute_tk(state.TK1); - skinny64_inv_permute_tk(state.TK2); - skinny64_inv_permute_tk(state.TK3); +void forkskinny_64_192_reverse_tk + (forkskinny_64_192_state_t *state, unsigned rounds) +{ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny64_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR2(state->TK2[2]); + skinny64_inv_LFSR2(state->TK2[3]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_LFSR3(state->TK3[2]); + skinny64_inv_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_64_192_inv_round(&state, round - 1); - } - be_store_word16(output_left, state.S[0]); - be_store_word16(output_left + 2, state.S[1]); - be_store_word16(output_left + 4, state.S[2]); - be_store_word16(output_left + 6, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&fstate, round); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_permute_tk(state->TK1); + skinny64_inv_permute_tk(state->TK2); + skinny64_inv_permute_tk(state->TK3); + --rounds; } - be_store_word16(output_right, fstate.S[0]); - be_store_word16(output_right + 2, fstate.S[1]); - be_store_word16(output_right + 4, fstate.S[2]); - be_store_word16(output_right + 6, fstate.S[3]); } + +#endif /* !__AVR__ */ diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/rhys/internal-forkskinny.h b/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/rhys/internal-forkskinny.h index 0c1a707..e3014d3 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/rhys/internal-forkskinny.h +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/rhys/internal-forkskinny.h @@ -23,6 +23,8 @@ #ifndef LW_INTERNAL_FORKSKINNY_H #define LW_INTERNAL_FORKSKINNY_H +#include "internal-util.h" + /** * \file internal-forkskinny.h * \brief ForkSkinny block cipher family. @@ -39,6 +41,158 @@ extern "C" { #endif /** + * \brief State information for ForkSkinny-128-256. + */ +typedef struct +{ + uint32_t TK1[4]; /**< First part of the tweakey */ + uint32_t TK2[4]; /**< Second part of the tweakey */ + uint32_t S[4]; /**< Current block state */ + +} forkskinny_128_256_state_t; + +/** + * \brief State information for ForkSkinny-128-384. + */ +typedef struct +{ + uint32_t TK1[4]; /**< First part of the tweakey */ + uint32_t TK2[4]; /**< Second part of the tweakey */ + uint32_t TK3[4]; /**< Third part of the tweakey */ + uint32_t S[4]; /**< Current block state */ + +} forkskinny_128_384_state_t; + +/** + * \brief State information for ForkSkinny-64-192. + */ +typedef struct +{ + uint16_t TK1[4]; /**< First part of the tweakey */ + uint16_t TK2[4]; /**< Second part of the tweakey */ + uint16_t TK3[4]; /**< Third part of the tweakey */ + uint16_t S[4]; /**< Current block state */ + +} forkskinny_64_192_state_t; + +/** + * \brief Applies several rounds of ForkSkinny-128-256. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + */ +void forkskinny_128_256_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-128-256 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_128_256_inv_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-128-256. + * + * \param state Points to the ForkSkinny-128-256 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_128_256_forward_tk + (forkskinny_128_256_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-128-256. + * + * \param state Points to the ForkSkinny-128-256 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_128_256_reverse_tk + (forkskinny_128_256_state_t *state, unsigned rounds); + +/** + * \brief Applies several rounds of ForkSkinny-128-384. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + */ +void forkskinny_128_384_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-128-384 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_128_384_inv_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-128-384. + * + * \param state Points to the ForkSkinny-128-384 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_128_384_forward_tk + (forkskinny_128_384_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-128-384. + * + * \param state Points to the ForkSkinny-128-384 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_128_384_reverse_tk + (forkskinny_128_384_state_t *state, unsigned rounds); + +/** + * \brief Applies several rounds of ForkSkinny-64-192. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + * + * Note: The cells of each row are ordered in big-endian nibble order + * so it is simplest to manage the rows in big-endian byte order. + */ +void forkskinny_64_192_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-64-192 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_64_192_inv_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-64-192. + * + * \param state Points to the ForkSkinny-64-192 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_64_192_forward_tk + (forkskinny_64_192_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-64-192. + * + * \param state Points to the ForkSkinny-64-192 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_64_192_reverse_tk + (forkskinny_64_192_state_t *state, unsigned rounds); + +/** * \brief Encrypts a block of plaintext with ForkSkinny-128-256. * * \param key 256-bit tweakey for ForkSkinny-128-256. diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/rhys/internal-skinnyutil.h b/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/rhys/internal-skinnyutil.h index 83136cb..8a5296d 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/rhys/internal-skinnyutil.h +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb128t288n104v1/rhys/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/rhys/forkae.c b/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/rhys/forkae.c index 4a9671a..49e7610 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/rhys/forkae.c +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/rhys/forkae.c @@ -22,7 +22,6 @@ #include "forkae.h" #include "internal-forkskinny.h" -#include "internal-util.h" #include aead_cipher_t const forkae_paef_64_192_cipher = { @@ -138,3 +137,476 @@ aead_cipher_t const forkae_saef_128_256_cipher = { #define FORKAE_TWEAKEY_REDUCED_SIZE 32 #define FORKAE_BLOCK_FUNC forkskinny_128_256 #include "internal-forkae-saef.h" + +/* Helper functions to implement the forking encrypt/decrypt block operations + * on top of the basic "perform N rounds" functions in internal-forkskinny.c */ + +/** + * \brief Number of rounds of ForkSkinny-128-256 before forking. + */ +#define FORKSKINNY_128_256_ROUNDS_BEFORE 21 + +/** + * \brief Number of rounds of ForkSkinny-128-256 after forking. + */ +#define FORKSKINNY_128_256_ROUNDS_AFTER 27 + +void forkskinny_128_256_encrypt + (const unsigned char key[32], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_256_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Run all of the rounds before the forking point */ + forkskinny_128_256_rounds(&state, 0, FORKSKINNY_128_256_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint32_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x08040201U; /* Branching constant */ + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + } +} + +void forkskinny_128_256_decrypt + (const unsigned char key[32], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_256_state_t state; + forkskinny_128_256_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_128_256_forward_tk + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_128_256_inv_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x08040201U; + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_128_256_reverse_tk(&state, FORKSKINNY_128_256_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_128_256_inv_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, 0); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_128_256_rounds + (&fstate, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, fstate.S[0]); + le_store_word32(output_right + 4, fstate.S[1]); + le_store_word32(output_right + 8, fstate.S[2]); + le_store_word32(output_right + 12, fstate.S[3]); +} + +/** + * \brief Number of rounds of ForkSkinny-128-384 before forking. + */ +#define FORKSKINNY_128_384_ROUNDS_BEFORE 25 + +/** + * \brief Number of rounds of ForkSkinny-128-384 after forking. + */ +#define FORKSKINNY_128_384_ROUNDS_AFTER 31 + +void forkskinny_128_384_encrypt + (const unsigned char key[48], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_384_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.TK3[0] = le_load_word32(key + 32); + state.TK3[1] = le_load_word32(key + 36); + state.TK3[2] = le_load_word32(key + 40); + state.TK3[3] = le_load_word32(key + 44); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Run all of the rounds before the forking point */ + forkskinny_128_384_rounds(&state, 0, FORKSKINNY_128_384_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint32_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x08040201U; /* Branching constant */ + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + } +} + +void forkskinny_128_384_decrypt + (const unsigned char key[48], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_384_state_t state; + forkskinny_128_384_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.TK3[0] = le_load_word32(key + 32); + state.TK3[1] = le_load_word32(key + 36); + state.TK3[2] = le_load_word32(key + 40); + state.TK3[3] = le_load_word32(key + 44); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_128_384_forward_tk + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_128_384_inv_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x08040201U; + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_128_384_reverse_tk(&state, FORKSKINNY_128_384_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_128_384_inv_rounds(&state, FORKSKINNY_128_384_ROUNDS_BEFORE, 0); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_128_384_rounds + (&fstate, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, fstate.S[0]); + le_store_word32(output_right + 4, fstate.S[1]); + le_store_word32(output_right + 8, fstate.S[2]); + le_store_word32(output_right + 12, fstate.S[3]); +} + +/** + * \brief Number of rounds of ForkSkinny-64-192 before forking. + */ +#define FORKSKINNY_64_192_ROUNDS_BEFORE 17 + +/** + * \brief Number of rounds of ForkSkinny-64-192 after forking. + */ +#define FORKSKINNY_64_192_ROUNDS_AFTER 23 + +void forkskinny_64_192_encrypt + (const unsigned char key[24], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_64_192_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = be_load_word16(key); + state.TK1[1] = be_load_word16(key + 2); + state.TK1[2] = be_load_word16(key + 4); + state.TK1[3] = be_load_word16(key + 6); + state.TK2[0] = be_load_word16(key + 8); + state.TK2[1] = be_load_word16(key + 10); + state.TK2[2] = be_load_word16(key + 12); + state.TK2[3] = be_load_word16(key + 14); + state.TK3[0] = be_load_word16(key + 16); + state.TK3[1] = be_load_word16(key + 18); + state.TK3[2] = be_load_word16(key + 20); + state.TK3[3] = be_load_word16(key + 22); + state.S[0] = be_load_word16(input); + state.S[1] = be_load_word16(input + 2); + state.S[2] = be_load_word16(input + 4); + state.S[3] = be_load_word16(input + 6); + + /* Run all of the rounds before the forking point */ + forkskinny_64_192_rounds(&state, 0, FORKSKINNY_64_192_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint16_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, state.S[0]); + be_store_word16(output_right + 2, state.S[1]); + be_store_word16(output_right + 4, state.S[2]); + be_store_word16(output_right + 6, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x1249U; /* Branching constant */ + state.S[1] ^= 0x36daU; + state.S[2] ^= 0x5b7fU; + state.S[3] ^= 0xec81U; + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2); + be_store_word16(output_left, state.S[0]); + be_store_word16(output_left + 2, state.S[1]); + be_store_word16(output_left + 4, state.S[2]); + be_store_word16(output_left + 6, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, state.S[0]); + be_store_word16(output_right + 2, state.S[1]); + be_store_word16(output_right + 4, state.S[2]); + be_store_word16(output_right + 6, state.S[3]); + } +} + +void forkskinny_64_192_decrypt + (const unsigned char key[24], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_64_192_state_t state; + forkskinny_64_192_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = be_load_word16(key); + state.TK1[1] = be_load_word16(key + 2); + state.TK1[2] = be_load_word16(key + 4); + state.TK1[3] = be_load_word16(key + 6); + state.TK2[0] = be_load_word16(key + 8); + state.TK2[1] = be_load_word16(key + 10); + state.TK2[2] = be_load_word16(key + 12); + state.TK2[3] = be_load_word16(key + 14); + state.TK3[0] = be_load_word16(key + 16); + state.TK3[1] = be_load_word16(key + 18); + state.TK3[2] = be_load_word16(key + 20); + state.TK3[3] = be_load_word16(key + 22); + state.S[0] = be_load_word16(input); + state.S[1] = be_load_word16(input + 2); + state.S[2] = be_load_word16(input + 4); + state.S[3] = be_load_word16(input + 6); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_64_192_forward_tk + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_64_192_inv_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x1249U; + state.S[1] ^= 0x36daU; + state.S[2] ^= 0x5b7fU; + state.S[3] ^= 0xec81U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_64_192_reverse_tk(&state, FORKSKINNY_64_192_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_64_192_inv_rounds(&state, FORKSKINNY_64_192_ROUNDS_BEFORE, 0); + be_store_word16(output_left, state.S[0]); + be_store_word16(output_left + 2, state.S[1]); + be_store_word16(output_left + 4, state.S[2]); + be_store_word16(output_left + 6, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_64_192_rounds + (&fstate, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, fstate.S[0]); + be_store_word16(output_right + 2, fstate.S[1]); + be_store_word16(output_right + 4, fstate.S[2]); + be_store_word16(output_right + 6, fstate.S[3]); +} diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/rhys/internal-forkskinny-avr.S b/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/rhys/internal-forkskinny-avr.S new file mode 100644 index 0000000..c7e0b37 --- /dev/null +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/rhys/internal-forkskinny-avr.S @@ -0,0 +1,8880 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_0, @object + .size table_0, 256 +table_0: + .byte 101 + .byte 76 + .byte 106 + .byte 66 + .byte 75 + .byte 99 + .byte 67 + .byte 107 + .byte 85 + .byte 117 + .byte 90 + .byte 122 + .byte 83 + .byte 115 + .byte 91 + .byte 123 + .byte 53 + .byte 140 + .byte 58 + .byte 129 + .byte 137 + .byte 51 + .byte 128 + .byte 59 + .byte 149 + .byte 37 + .byte 152 + .byte 42 + .byte 144 + .byte 35 + .byte 153 + .byte 43 + .byte 229 + .byte 204 + .byte 232 + .byte 193 + .byte 201 + .byte 224 + .byte 192 + .byte 233 + .byte 213 + .byte 245 + .byte 216 + .byte 248 + .byte 208 + .byte 240 + .byte 217 + .byte 249 + .byte 165 + .byte 28 + .byte 168 + .byte 18 + .byte 27 + .byte 160 + .byte 19 + .byte 169 + .byte 5 + .byte 181 + .byte 10 + .byte 184 + .byte 3 + .byte 176 + .byte 11 + .byte 185 + .byte 50 + .byte 136 + .byte 60 + .byte 133 + .byte 141 + .byte 52 + .byte 132 + .byte 61 + .byte 145 + .byte 34 + .byte 156 + .byte 44 + .byte 148 + .byte 36 + .byte 157 + .byte 45 + .byte 98 + .byte 74 + .byte 108 + .byte 69 + .byte 77 + .byte 100 + .byte 68 + .byte 109 + .byte 82 + .byte 114 + .byte 92 + .byte 124 + .byte 84 + .byte 116 + .byte 93 + .byte 125 + .byte 161 + .byte 26 + .byte 172 + .byte 21 + .byte 29 + .byte 164 + .byte 20 + .byte 173 + .byte 2 + .byte 177 + .byte 12 + .byte 188 + .byte 4 + .byte 180 + .byte 13 + .byte 189 + .byte 225 + .byte 200 + .byte 236 + .byte 197 + .byte 205 + .byte 228 + .byte 196 + .byte 237 + .byte 209 + .byte 241 + .byte 220 + .byte 252 + .byte 212 + .byte 244 + .byte 221 + .byte 253 + .byte 54 + .byte 142 + .byte 56 + .byte 130 + .byte 139 + .byte 48 + .byte 131 + .byte 57 + .byte 150 + .byte 38 + .byte 154 + .byte 40 + .byte 147 + .byte 32 + .byte 155 + .byte 41 + .byte 102 + .byte 78 + .byte 104 + .byte 65 + .byte 73 + .byte 96 + .byte 64 + .byte 105 + .byte 86 + .byte 118 + .byte 88 + .byte 120 + .byte 80 + .byte 112 + .byte 89 + .byte 121 + .byte 166 + .byte 30 + .byte 170 + .byte 17 + .byte 25 + .byte 163 + .byte 16 + .byte 171 + .byte 6 + .byte 182 + .byte 8 + .byte 186 + .byte 0 + .byte 179 + .byte 9 + .byte 187 + .byte 230 + .byte 206 + .byte 234 + .byte 194 + .byte 203 + .byte 227 + .byte 195 + .byte 235 + .byte 214 + .byte 246 + .byte 218 + .byte 250 + .byte 211 + .byte 243 + .byte 219 + .byte 251 + .byte 49 + .byte 138 + .byte 62 + .byte 134 + .byte 143 + .byte 55 + .byte 135 + .byte 63 + .byte 146 + .byte 33 + .byte 158 + .byte 46 + .byte 151 + .byte 39 + .byte 159 + .byte 47 + .byte 97 + .byte 72 + .byte 110 + .byte 70 + .byte 79 + .byte 103 + .byte 71 + .byte 111 + .byte 81 + .byte 113 + .byte 94 + .byte 126 + .byte 87 + .byte 119 + .byte 95 + .byte 127 + .byte 162 + .byte 24 + .byte 174 + .byte 22 + .byte 31 + .byte 167 + .byte 23 + .byte 175 + .byte 1 + .byte 178 + .byte 14 + .byte 190 + .byte 7 + .byte 183 + .byte 15 + .byte 191 + .byte 226 + .byte 202 + .byte 238 + .byte 198 + .byte 207 + .byte 231 + .byte 199 + .byte 239 + .byte 210 + .byte 242 + .byte 222 + .byte 254 + .byte 215 + .byte 247 + .byte 223 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_1, @object + .size table_1, 256 +table_1: + .byte 172 + .byte 232 + .byte 104 + .byte 60 + .byte 108 + .byte 56 + .byte 168 + .byte 236 + .byte 170 + .byte 174 + .byte 58 + .byte 62 + .byte 106 + .byte 110 + .byte 234 + .byte 238 + .byte 166 + .byte 163 + .byte 51 + .byte 54 + .byte 102 + .byte 99 + .byte 227 + .byte 230 + .byte 225 + .byte 164 + .byte 97 + .byte 52 + .byte 49 + .byte 100 + .byte 161 + .byte 228 + .byte 141 + .byte 201 + .byte 73 + .byte 29 + .byte 77 + .byte 25 + .byte 137 + .byte 205 + .byte 139 + .byte 143 + .byte 27 + .byte 31 + .byte 75 + .byte 79 + .byte 203 + .byte 207 + .byte 133 + .byte 192 + .byte 64 + .byte 21 + .byte 69 + .byte 16 + .byte 128 + .byte 197 + .byte 130 + .byte 135 + .byte 18 + .byte 23 + .byte 66 + .byte 71 + .byte 194 + .byte 199 + .byte 150 + .byte 147 + .byte 3 + .byte 6 + .byte 86 + .byte 83 + .byte 211 + .byte 214 + .byte 209 + .byte 148 + .byte 81 + .byte 4 + .byte 1 + .byte 84 + .byte 145 + .byte 212 + .byte 156 + .byte 216 + .byte 88 + .byte 12 + .byte 92 + .byte 8 + .byte 152 + .byte 220 + .byte 154 + .byte 158 + .byte 10 + .byte 14 + .byte 90 + .byte 94 + .byte 218 + .byte 222 + .byte 149 + .byte 208 + .byte 80 + .byte 5 + .byte 85 + .byte 0 + .byte 144 + .byte 213 + .byte 146 + .byte 151 + .byte 2 + .byte 7 + .byte 82 + .byte 87 + .byte 210 + .byte 215 + .byte 157 + .byte 217 + .byte 89 + .byte 13 + .byte 93 + .byte 9 + .byte 153 + .byte 221 + .byte 155 + .byte 159 + .byte 11 + .byte 15 + .byte 91 + .byte 95 + .byte 219 + .byte 223 + .byte 22 + .byte 19 + .byte 131 + .byte 134 + .byte 70 + .byte 67 + .byte 195 + .byte 198 + .byte 65 + .byte 20 + .byte 193 + .byte 132 + .byte 17 + .byte 68 + .byte 129 + .byte 196 + .byte 28 + .byte 72 + .byte 200 + .byte 140 + .byte 76 + .byte 24 + .byte 136 + .byte 204 + .byte 26 + .byte 30 + .byte 138 + .byte 142 + .byte 74 + .byte 78 + .byte 202 + .byte 206 + .byte 53 + .byte 96 + .byte 224 + .byte 165 + .byte 101 + .byte 48 + .byte 160 + .byte 229 + .byte 50 + .byte 55 + .byte 162 + .byte 167 + .byte 98 + .byte 103 + .byte 226 + .byte 231 + .byte 61 + .byte 105 + .byte 233 + .byte 173 + .byte 109 + .byte 57 + .byte 169 + .byte 237 + .byte 59 + .byte 63 + .byte 171 + .byte 175 + .byte 107 + .byte 111 + .byte 235 + .byte 239 + .byte 38 + .byte 35 + .byte 179 + .byte 182 + .byte 118 + .byte 115 + .byte 243 + .byte 246 + .byte 113 + .byte 36 + .byte 241 + .byte 180 + .byte 33 + .byte 116 + .byte 177 + .byte 244 + .byte 44 + .byte 120 + .byte 248 + .byte 188 + .byte 124 + .byte 40 + .byte 184 + .byte 252 + .byte 42 + .byte 46 + .byte 186 + .byte 190 + .byte 122 + .byte 126 + .byte 250 + .byte 254 + .byte 37 + .byte 112 + .byte 240 + .byte 181 + .byte 117 + .byte 32 + .byte 176 + .byte 245 + .byte 34 + .byte 39 + .byte 178 + .byte 183 + .byte 114 + .byte 119 + .byte 242 + .byte 247 + .byte 45 + .byte 121 + .byte 249 + .byte 189 + .byte 125 + .byte 41 + .byte 185 + .byte 253 + .byte 43 + .byte 47 + .byte 187 + .byte 191 + .byte 123 + .byte 127 + .byte 251 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_2, @object + .size table_2, 256 +table_2: + .byte 0 + .byte 2 + .byte 4 + .byte 6 + .byte 8 + .byte 10 + .byte 12 + .byte 14 + .byte 16 + .byte 18 + .byte 20 + .byte 22 + .byte 24 + .byte 26 + .byte 28 + .byte 30 + .byte 32 + .byte 34 + .byte 36 + .byte 38 + .byte 40 + .byte 42 + .byte 44 + .byte 46 + .byte 48 + .byte 50 + .byte 52 + .byte 54 + .byte 56 + .byte 58 + .byte 60 + .byte 62 + .byte 65 + .byte 67 + .byte 69 + .byte 71 + .byte 73 + .byte 75 + .byte 77 + .byte 79 + .byte 81 + .byte 83 + .byte 85 + .byte 87 + .byte 89 + .byte 91 + .byte 93 + .byte 95 + .byte 97 + .byte 99 + .byte 101 + .byte 103 + .byte 105 + .byte 107 + .byte 109 + .byte 111 + .byte 113 + .byte 115 + .byte 117 + .byte 119 + .byte 121 + .byte 123 + .byte 125 + .byte 127 + .byte 128 + .byte 130 + .byte 132 + .byte 134 + .byte 136 + .byte 138 + .byte 140 + .byte 142 + .byte 144 + .byte 146 + .byte 148 + .byte 150 + .byte 152 + .byte 154 + .byte 156 + .byte 158 + .byte 160 + .byte 162 + .byte 164 + .byte 166 + .byte 168 + .byte 170 + .byte 172 + .byte 174 + .byte 176 + .byte 178 + .byte 180 + .byte 182 + .byte 184 + .byte 186 + .byte 188 + .byte 190 + .byte 193 + .byte 195 + .byte 197 + .byte 199 + .byte 201 + .byte 203 + .byte 205 + .byte 207 + .byte 209 + .byte 211 + .byte 213 + .byte 215 + .byte 217 + .byte 219 + .byte 221 + .byte 223 + .byte 225 + .byte 227 + .byte 229 + .byte 231 + .byte 233 + .byte 235 + .byte 237 + .byte 239 + .byte 241 + .byte 243 + .byte 245 + .byte 247 + .byte 249 + .byte 251 + .byte 253 + .byte 255 + .byte 1 + .byte 3 + .byte 5 + .byte 7 + .byte 9 + .byte 11 + .byte 13 + .byte 15 + .byte 17 + .byte 19 + .byte 21 + .byte 23 + .byte 25 + .byte 27 + .byte 29 + .byte 31 + .byte 33 + .byte 35 + .byte 37 + .byte 39 + .byte 41 + .byte 43 + .byte 45 + .byte 47 + .byte 49 + .byte 51 + .byte 53 + .byte 55 + .byte 57 + .byte 59 + .byte 61 + .byte 63 + .byte 64 + .byte 66 + .byte 68 + .byte 70 + .byte 72 + .byte 74 + .byte 76 + .byte 78 + .byte 80 + .byte 82 + .byte 84 + .byte 86 + .byte 88 + .byte 90 + .byte 92 + .byte 94 + .byte 96 + .byte 98 + .byte 100 + .byte 102 + .byte 104 + .byte 106 + .byte 108 + .byte 110 + .byte 112 + .byte 114 + .byte 116 + .byte 118 + .byte 120 + .byte 122 + .byte 124 + .byte 126 + .byte 129 + .byte 131 + .byte 133 + .byte 135 + .byte 137 + .byte 139 + .byte 141 + .byte 143 + .byte 145 + .byte 147 + .byte 149 + .byte 151 + .byte 153 + .byte 155 + .byte 157 + .byte 159 + .byte 161 + .byte 163 + .byte 165 + .byte 167 + .byte 169 + .byte 171 + .byte 173 + .byte 175 + .byte 177 + .byte 179 + .byte 181 + .byte 183 + .byte 185 + .byte 187 + .byte 189 + .byte 191 + .byte 192 + .byte 194 + .byte 196 + .byte 198 + .byte 200 + .byte 202 + .byte 204 + .byte 206 + .byte 208 + .byte 210 + .byte 212 + .byte 214 + .byte 216 + .byte 218 + .byte 220 + .byte 222 + .byte 224 + .byte 226 + .byte 228 + .byte 230 + .byte 232 + .byte 234 + .byte 236 + .byte 238 + .byte 240 + .byte 242 + .byte 244 + .byte 246 + .byte 248 + .byte 250 + .byte 252 + .byte 254 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_3, @object + .size table_3, 256 +table_3: + .byte 0 + .byte 128 + .byte 1 + .byte 129 + .byte 2 + .byte 130 + .byte 3 + .byte 131 + .byte 4 + .byte 132 + .byte 5 + .byte 133 + .byte 6 + .byte 134 + .byte 7 + .byte 135 + .byte 8 + .byte 136 + .byte 9 + .byte 137 + .byte 10 + .byte 138 + .byte 11 + .byte 139 + .byte 12 + .byte 140 + .byte 13 + .byte 141 + .byte 14 + .byte 142 + .byte 15 + .byte 143 + .byte 16 + .byte 144 + .byte 17 + .byte 145 + .byte 18 + .byte 146 + .byte 19 + .byte 147 + .byte 20 + .byte 148 + .byte 21 + .byte 149 + .byte 22 + .byte 150 + .byte 23 + .byte 151 + .byte 24 + .byte 152 + .byte 25 + .byte 153 + .byte 26 + .byte 154 + .byte 27 + .byte 155 + .byte 28 + .byte 156 + .byte 29 + .byte 157 + .byte 30 + .byte 158 + .byte 31 + .byte 159 + .byte 160 + .byte 32 + .byte 161 + .byte 33 + .byte 162 + .byte 34 + .byte 163 + .byte 35 + .byte 164 + .byte 36 + .byte 165 + .byte 37 + .byte 166 + .byte 38 + .byte 167 + .byte 39 + .byte 168 + .byte 40 + .byte 169 + .byte 41 + .byte 170 + .byte 42 + .byte 171 + .byte 43 + .byte 172 + .byte 44 + .byte 173 + .byte 45 + .byte 174 + .byte 46 + .byte 175 + .byte 47 + .byte 176 + .byte 48 + .byte 177 + .byte 49 + .byte 178 + .byte 50 + .byte 179 + .byte 51 + .byte 180 + .byte 52 + .byte 181 + .byte 53 + .byte 182 + .byte 54 + .byte 183 + .byte 55 + .byte 184 + .byte 56 + .byte 185 + .byte 57 + .byte 186 + .byte 58 + .byte 187 + .byte 59 + .byte 188 + .byte 60 + .byte 189 + .byte 61 + .byte 190 + .byte 62 + .byte 191 + .byte 63 + .byte 64 + .byte 192 + .byte 65 + .byte 193 + .byte 66 + .byte 194 + .byte 67 + .byte 195 + .byte 68 + .byte 196 + .byte 69 + .byte 197 + .byte 70 + .byte 198 + .byte 71 + .byte 199 + .byte 72 + .byte 200 + .byte 73 + .byte 201 + .byte 74 + .byte 202 + .byte 75 + .byte 203 + .byte 76 + .byte 204 + .byte 77 + .byte 205 + .byte 78 + .byte 206 + .byte 79 + .byte 207 + .byte 80 + .byte 208 + .byte 81 + .byte 209 + .byte 82 + .byte 210 + .byte 83 + .byte 211 + .byte 84 + .byte 212 + .byte 85 + .byte 213 + .byte 86 + .byte 214 + .byte 87 + .byte 215 + .byte 88 + .byte 216 + .byte 89 + .byte 217 + .byte 90 + .byte 218 + .byte 91 + .byte 219 + .byte 92 + .byte 220 + .byte 93 + .byte 221 + .byte 94 + .byte 222 + .byte 95 + .byte 223 + .byte 224 + .byte 96 + .byte 225 + .byte 97 + .byte 226 + .byte 98 + .byte 227 + .byte 99 + .byte 228 + .byte 100 + .byte 229 + .byte 101 + .byte 230 + .byte 102 + .byte 231 + .byte 103 + .byte 232 + .byte 104 + .byte 233 + .byte 105 + .byte 234 + .byte 106 + .byte 235 + .byte 107 + .byte 236 + .byte 108 + .byte 237 + .byte 109 + .byte 238 + .byte 110 + .byte 239 + .byte 111 + .byte 240 + .byte 112 + .byte 241 + .byte 113 + .byte 242 + .byte 114 + .byte 243 + .byte 115 + .byte 244 + .byte 116 + .byte 245 + .byte 117 + .byte 246 + .byte 118 + .byte 247 + .byte 119 + .byte 248 + .byte 120 + .byte 249 + .byte 121 + .byte 250 + .byte 122 + .byte 251 + .byte 123 + .byte 252 + .byte 124 + .byte 253 + .byte 125 + .byte 254 + .byte 126 + .byte 255 + .byte 127 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_4, @object + .size table_4, 174 +table_4: + .byte 1 + .byte 0 + .byte 3 + .byte 0 + .byte 7 + .byte 0 + .byte 15 + .byte 0 + .byte 15 + .byte 1 + .byte 15 + .byte 3 + .byte 14 + .byte 7 + .byte 13 + .byte 7 + .byte 11 + .byte 7 + .byte 7 + .byte 7 + .byte 15 + .byte 6 + .byte 15 + .byte 5 + .byte 14 + .byte 3 + .byte 12 + .byte 7 + .byte 9 + .byte 7 + .byte 3 + .byte 7 + .byte 7 + .byte 6 + .byte 15 + .byte 4 + .byte 14 + .byte 1 + .byte 13 + .byte 3 + .byte 10 + .byte 7 + .byte 5 + .byte 7 + .byte 11 + .byte 6 + .byte 7 + .byte 5 + .byte 14 + .byte 2 + .byte 12 + .byte 5 + .byte 8 + .byte 3 + .byte 0 + .byte 7 + .byte 1 + .byte 6 + .byte 3 + .byte 4 + .byte 6 + .byte 0 + .byte 13 + .byte 0 + .byte 11 + .byte 1 + .byte 7 + .byte 3 + .byte 14 + .byte 6 + .byte 13 + .byte 5 + .byte 10 + .byte 3 + .byte 4 + .byte 7 + .byte 9 + .byte 6 + .byte 3 + .byte 5 + .byte 6 + .byte 2 + .byte 12 + .byte 4 + .byte 8 + .byte 1 + .byte 1 + .byte 3 + .byte 2 + .byte 6 + .byte 5 + .byte 4 + .byte 10 + .byte 0 + .byte 5 + .byte 1 + .byte 11 + .byte 2 + .byte 6 + .byte 5 + .byte 12 + .byte 2 + .byte 8 + .byte 5 + .byte 0 + .byte 3 + .byte 0 + .byte 6 + .byte 1 + .byte 4 + .byte 2 + .byte 0 + .byte 5 + .byte 0 + .byte 11 + .byte 0 + .byte 7 + .byte 1 + .byte 15 + .byte 2 + .byte 14 + .byte 5 + .byte 12 + .byte 3 + .byte 8 + .byte 7 + .byte 1 + .byte 7 + .byte 3 + .byte 6 + .byte 7 + .byte 4 + .byte 14 + .byte 0 + .byte 13 + .byte 1 + .byte 11 + .byte 3 + .byte 6 + .byte 7 + .byte 13 + .byte 6 + .byte 11 + .byte 5 + .byte 6 + .byte 3 + .byte 12 + .byte 6 + .byte 9 + .byte 5 + .byte 2 + .byte 3 + .byte 4 + .byte 6 + .byte 9 + .byte 4 + .byte 2 + .byte 1 + .byte 5 + .byte 2 + .byte 10 + .byte 4 + .byte 4 + .byte 1 + .byte 9 + .byte 2 + .byte 2 + .byte 5 + .byte 4 + .byte 2 + .byte 8 + .byte 4 + .byte 0 + .byte 1 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_5, @object + .size table_5, 256 +table_5: + .byte 204 + .byte 198 + .byte 201 + .byte 192 + .byte 193 + .byte 202 + .byte 194 + .byte 203 + .byte 195 + .byte 200 + .byte 197 + .byte 205 + .byte 196 + .byte 206 + .byte 199 + .byte 207 + .byte 108 + .byte 102 + .byte 105 + .byte 96 + .byte 97 + .byte 106 + .byte 98 + .byte 107 + .byte 99 + .byte 104 + .byte 101 + .byte 109 + .byte 100 + .byte 110 + .byte 103 + .byte 111 + .byte 156 + .byte 150 + .byte 153 + .byte 144 + .byte 145 + .byte 154 + .byte 146 + .byte 155 + .byte 147 + .byte 152 + .byte 149 + .byte 157 + .byte 148 + .byte 158 + .byte 151 + .byte 159 + .byte 12 + .byte 6 + .byte 9 + .byte 0 + .byte 1 + .byte 10 + .byte 2 + .byte 11 + .byte 3 + .byte 8 + .byte 5 + .byte 13 + .byte 4 + .byte 14 + .byte 7 + .byte 15 + .byte 28 + .byte 22 + .byte 25 + .byte 16 + .byte 17 + .byte 26 + .byte 18 + .byte 27 + .byte 19 + .byte 24 + .byte 21 + .byte 29 + .byte 20 + .byte 30 + .byte 23 + .byte 31 + .byte 172 + .byte 166 + .byte 169 + .byte 160 + .byte 161 + .byte 170 + .byte 162 + .byte 171 + .byte 163 + .byte 168 + .byte 165 + .byte 173 + .byte 164 + .byte 174 + .byte 167 + .byte 175 + .byte 44 + .byte 38 + .byte 41 + .byte 32 + .byte 33 + .byte 42 + .byte 34 + .byte 43 + .byte 35 + .byte 40 + .byte 37 + .byte 45 + .byte 36 + .byte 46 + .byte 39 + .byte 47 + .byte 188 + .byte 182 + .byte 185 + .byte 176 + .byte 177 + .byte 186 + .byte 178 + .byte 187 + .byte 179 + .byte 184 + .byte 181 + .byte 189 + .byte 180 + .byte 190 + .byte 183 + .byte 191 + .byte 60 + .byte 54 + .byte 57 + .byte 48 + .byte 49 + .byte 58 + .byte 50 + .byte 59 + .byte 51 + .byte 56 + .byte 53 + .byte 61 + .byte 52 + .byte 62 + .byte 55 + .byte 63 + .byte 140 + .byte 134 + .byte 137 + .byte 128 + .byte 129 + .byte 138 + .byte 130 + .byte 139 + .byte 131 + .byte 136 + .byte 133 + .byte 141 + .byte 132 + .byte 142 + .byte 135 + .byte 143 + .byte 92 + .byte 86 + .byte 89 + .byte 80 + .byte 81 + .byte 90 + .byte 82 + .byte 91 + .byte 83 + .byte 88 + .byte 85 + .byte 93 + .byte 84 + .byte 94 + .byte 87 + .byte 95 + .byte 220 + .byte 214 + .byte 217 + .byte 208 + .byte 209 + .byte 218 + .byte 210 + .byte 219 + .byte 211 + .byte 216 + .byte 213 + .byte 221 + .byte 212 + .byte 222 + .byte 215 + .byte 223 + .byte 76 + .byte 70 + .byte 73 + .byte 64 + .byte 65 + .byte 74 + .byte 66 + .byte 75 + .byte 67 + .byte 72 + .byte 69 + .byte 77 + .byte 68 + .byte 78 + .byte 71 + .byte 79 + .byte 236 + .byte 230 + .byte 233 + .byte 224 + .byte 225 + .byte 234 + .byte 226 + .byte 235 + .byte 227 + .byte 232 + .byte 229 + .byte 237 + .byte 228 + .byte 238 + .byte 231 + .byte 239 + .byte 124 + .byte 118 + .byte 121 + .byte 112 + .byte 113 + .byte 122 + .byte 114 + .byte 123 + .byte 115 + .byte 120 + .byte 117 + .byte 125 + .byte 116 + .byte 126 + .byte 119 + .byte 127 + .byte 252 + .byte 246 + .byte 249 + .byte 240 + .byte 241 + .byte 250 + .byte 242 + .byte 251 + .byte 243 + .byte 248 + .byte 245 + .byte 253 + .byte 244 + .byte 254 + .byte 247 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_6, @object + .size table_6, 256 +table_6: + .byte 51 + .byte 52 + .byte 54 + .byte 56 + .byte 60 + .byte 58 + .byte 49 + .byte 62 + .byte 57 + .byte 50 + .byte 53 + .byte 55 + .byte 48 + .byte 59 + .byte 61 + .byte 63 + .byte 67 + .byte 68 + .byte 70 + .byte 72 + .byte 76 + .byte 74 + .byte 65 + .byte 78 + .byte 73 + .byte 66 + .byte 69 + .byte 71 + .byte 64 + .byte 75 + .byte 77 + .byte 79 + .byte 99 + .byte 100 + .byte 102 + .byte 104 + .byte 108 + .byte 106 + .byte 97 + .byte 110 + .byte 105 + .byte 98 + .byte 101 + .byte 103 + .byte 96 + .byte 107 + .byte 109 + .byte 111 + .byte 131 + .byte 132 + .byte 134 + .byte 136 + .byte 140 + .byte 138 + .byte 129 + .byte 142 + .byte 137 + .byte 130 + .byte 133 + .byte 135 + .byte 128 + .byte 139 + .byte 141 + .byte 143 + .byte 195 + .byte 196 + .byte 198 + .byte 200 + .byte 204 + .byte 202 + .byte 193 + .byte 206 + .byte 201 + .byte 194 + .byte 197 + .byte 199 + .byte 192 + .byte 203 + .byte 205 + .byte 207 + .byte 163 + .byte 164 + .byte 166 + .byte 168 + .byte 172 + .byte 170 + .byte 161 + .byte 174 + .byte 169 + .byte 162 + .byte 165 + .byte 167 + .byte 160 + .byte 171 + .byte 173 + .byte 175 + .byte 19 + .byte 20 + .byte 22 + .byte 24 + .byte 28 + .byte 26 + .byte 17 + .byte 30 + .byte 25 + .byte 18 + .byte 21 + .byte 23 + .byte 16 + .byte 27 + .byte 29 + .byte 31 + .byte 227 + .byte 228 + .byte 230 + .byte 232 + .byte 236 + .byte 234 + .byte 225 + .byte 238 + .byte 233 + .byte 226 + .byte 229 + .byte 231 + .byte 224 + .byte 235 + .byte 237 + .byte 239 + .byte 147 + .byte 148 + .byte 150 + .byte 152 + .byte 156 + .byte 154 + .byte 145 + .byte 158 + .byte 153 + .byte 146 + .byte 149 + .byte 151 + .byte 144 + .byte 155 + .byte 157 + .byte 159 + .byte 35 + .byte 36 + .byte 38 + .byte 40 + .byte 44 + .byte 42 + .byte 33 + .byte 46 + .byte 41 + .byte 34 + .byte 37 + .byte 39 + .byte 32 + .byte 43 + .byte 45 + .byte 47 + .byte 83 + .byte 84 + .byte 86 + .byte 88 + .byte 92 + .byte 90 + .byte 81 + .byte 94 + .byte 89 + .byte 82 + .byte 85 + .byte 87 + .byte 80 + .byte 91 + .byte 93 + .byte 95 + .byte 115 + .byte 116 + .byte 118 + .byte 120 + .byte 124 + .byte 122 + .byte 113 + .byte 126 + .byte 121 + .byte 114 + .byte 117 + .byte 119 + .byte 112 + .byte 123 + .byte 125 + .byte 127 + .byte 3 + .byte 4 + .byte 6 + .byte 8 + .byte 12 + .byte 10 + .byte 1 + .byte 14 + .byte 9 + .byte 2 + .byte 5 + .byte 7 + .byte 0 + .byte 11 + .byte 13 + .byte 15 + .byte 179 + .byte 180 + .byte 182 + .byte 184 + .byte 188 + .byte 186 + .byte 177 + .byte 190 + .byte 185 + .byte 178 + .byte 181 + .byte 183 + .byte 176 + .byte 187 + .byte 189 + .byte 191 + .byte 211 + .byte 212 + .byte 214 + .byte 216 + .byte 220 + .byte 218 + .byte 209 + .byte 222 + .byte 217 + .byte 210 + .byte 213 + .byte 215 + .byte 208 + .byte 219 + .byte 221 + .byte 223 + .byte 243 + .byte 244 + .byte 246 + .byte 248 + .byte 252 + .byte 250 + .byte 241 + .byte 254 + .byte 249 + .byte 242 + .byte 245 + .byte 247 + .byte 240 + .byte 251 + .byte 253 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_7, @object + .size table_7, 256 +table_7: + .byte 0 + .byte 2 + .byte 4 + .byte 6 + .byte 9 + .byte 11 + .byte 13 + .byte 15 + .byte 1 + .byte 3 + .byte 5 + .byte 7 + .byte 8 + .byte 10 + .byte 12 + .byte 14 + .byte 32 + .byte 34 + .byte 36 + .byte 38 + .byte 41 + .byte 43 + .byte 45 + .byte 47 + .byte 33 + .byte 35 + .byte 37 + .byte 39 + .byte 40 + .byte 42 + .byte 44 + .byte 46 + .byte 64 + .byte 66 + .byte 68 + .byte 70 + .byte 73 + .byte 75 + .byte 77 + .byte 79 + .byte 65 + .byte 67 + .byte 69 + .byte 71 + .byte 72 + .byte 74 + .byte 76 + .byte 78 + .byte 96 + .byte 98 + .byte 100 + .byte 102 + .byte 105 + .byte 107 + .byte 109 + .byte 111 + .byte 97 + .byte 99 + .byte 101 + .byte 103 + .byte 104 + .byte 106 + .byte 108 + .byte 110 + .byte 144 + .byte 146 + .byte 148 + .byte 150 + .byte 153 + .byte 155 + .byte 157 + .byte 159 + .byte 145 + .byte 147 + .byte 149 + .byte 151 + .byte 152 + .byte 154 + .byte 156 + .byte 158 + .byte 176 + .byte 178 + .byte 180 + .byte 182 + .byte 185 + .byte 187 + .byte 189 + .byte 191 + .byte 177 + .byte 179 + .byte 181 + .byte 183 + .byte 184 + .byte 186 + .byte 188 + .byte 190 + .byte 208 + .byte 210 + .byte 212 + .byte 214 + .byte 217 + .byte 219 + .byte 221 + .byte 223 + .byte 209 + .byte 211 + .byte 213 + .byte 215 + .byte 216 + .byte 218 + .byte 220 + .byte 222 + .byte 240 + .byte 242 + .byte 244 + .byte 246 + .byte 249 + .byte 251 + .byte 253 + .byte 255 + .byte 241 + .byte 243 + .byte 245 + .byte 247 + .byte 248 + .byte 250 + .byte 252 + .byte 254 + .byte 16 + .byte 18 + .byte 20 + .byte 22 + .byte 25 + .byte 27 + .byte 29 + .byte 31 + .byte 17 + .byte 19 + .byte 21 + .byte 23 + .byte 24 + .byte 26 + .byte 28 + .byte 30 + .byte 48 + .byte 50 + .byte 52 + .byte 54 + .byte 57 + .byte 59 + .byte 61 + .byte 63 + .byte 49 + .byte 51 + .byte 53 + .byte 55 + .byte 56 + .byte 58 + .byte 60 + .byte 62 + .byte 80 + .byte 82 + .byte 84 + .byte 86 + .byte 89 + .byte 91 + .byte 93 + .byte 95 + .byte 81 + .byte 83 + .byte 85 + .byte 87 + .byte 88 + .byte 90 + .byte 92 + .byte 94 + .byte 112 + .byte 114 + .byte 116 + .byte 118 + .byte 121 + .byte 123 + .byte 125 + .byte 127 + .byte 113 + .byte 115 + .byte 117 + .byte 119 + .byte 120 + .byte 122 + .byte 124 + .byte 126 + .byte 128 + .byte 130 + .byte 132 + .byte 134 + .byte 137 + .byte 139 + .byte 141 + .byte 143 + .byte 129 + .byte 131 + .byte 133 + .byte 135 + .byte 136 + .byte 138 + .byte 140 + .byte 142 + .byte 160 + .byte 162 + .byte 164 + .byte 166 + .byte 169 + .byte 171 + .byte 173 + .byte 175 + .byte 161 + .byte 163 + .byte 165 + .byte 167 + .byte 168 + .byte 170 + .byte 172 + .byte 174 + .byte 192 + .byte 194 + .byte 196 + .byte 198 + .byte 201 + .byte 203 + .byte 205 + .byte 207 + .byte 193 + .byte 195 + .byte 197 + .byte 199 + .byte 200 + .byte 202 + .byte 204 + .byte 206 + .byte 224 + .byte 226 + .byte 228 + .byte 230 + .byte 233 + .byte 235 + .byte 237 + .byte 239 + .byte 225 + .byte 227 + .byte 229 + .byte 231 + .byte 232 + .byte 234 + .byte 236 + .byte 238 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_8, @object + .size table_8, 256 +table_8: + .byte 0 + .byte 8 + .byte 1 + .byte 9 + .byte 2 + .byte 10 + .byte 3 + .byte 11 + .byte 12 + .byte 4 + .byte 13 + .byte 5 + .byte 14 + .byte 6 + .byte 15 + .byte 7 + .byte 128 + .byte 136 + .byte 129 + .byte 137 + .byte 130 + .byte 138 + .byte 131 + .byte 139 + .byte 140 + .byte 132 + .byte 141 + .byte 133 + .byte 142 + .byte 134 + .byte 143 + .byte 135 + .byte 16 + .byte 24 + .byte 17 + .byte 25 + .byte 18 + .byte 26 + .byte 19 + .byte 27 + .byte 28 + .byte 20 + .byte 29 + .byte 21 + .byte 30 + .byte 22 + .byte 31 + .byte 23 + .byte 144 + .byte 152 + .byte 145 + .byte 153 + .byte 146 + .byte 154 + .byte 147 + .byte 155 + .byte 156 + .byte 148 + .byte 157 + .byte 149 + .byte 158 + .byte 150 + .byte 159 + .byte 151 + .byte 32 + .byte 40 + .byte 33 + .byte 41 + .byte 34 + .byte 42 + .byte 35 + .byte 43 + .byte 44 + .byte 36 + .byte 45 + .byte 37 + .byte 46 + .byte 38 + .byte 47 + .byte 39 + .byte 160 + .byte 168 + .byte 161 + .byte 169 + .byte 162 + .byte 170 + .byte 163 + .byte 171 + .byte 172 + .byte 164 + .byte 173 + .byte 165 + .byte 174 + .byte 166 + .byte 175 + .byte 167 + .byte 48 + .byte 56 + .byte 49 + .byte 57 + .byte 50 + .byte 58 + .byte 51 + .byte 59 + .byte 60 + .byte 52 + .byte 61 + .byte 53 + .byte 62 + .byte 54 + .byte 63 + .byte 55 + .byte 176 + .byte 184 + .byte 177 + .byte 185 + .byte 178 + .byte 186 + .byte 179 + .byte 187 + .byte 188 + .byte 180 + .byte 189 + .byte 181 + .byte 190 + .byte 182 + .byte 191 + .byte 183 + .byte 192 + .byte 200 + .byte 193 + .byte 201 + .byte 194 + .byte 202 + .byte 195 + .byte 203 + .byte 204 + .byte 196 + .byte 205 + .byte 197 + .byte 206 + .byte 198 + .byte 207 + .byte 199 + .byte 64 + .byte 72 + .byte 65 + .byte 73 + .byte 66 + .byte 74 + .byte 67 + .byte 75 + .byte 76 + .byte 68 + .byte 77 + .byte 69 + .byte 78 + .byte 70 + .byte 79 + .byte 71 + .byte 208 + .byte 216 + .byte 209 + .byte 217 + .byte 210 + .byte 218 + .byte 211 + .byte 219 + .byte 220 + .byte 212 + .byte 221 + .byte 213 + .byte 222 + .byte 214 + .byte 223 + .byte 215 + .byte 80 + .byte 88 + .byte 81 + .byte 89 + .byte 82 + .byte 90 + .byte 83 + .byte 91 + .byte 92 + .byte 84 + .byte 93 + .byte 85 + .byte 94 + .byte 86 + .byte 95 + .byte 87 + .byte 224 + .byte 232 + .byte 225 + .byte 233 + .byte 226 + .byte 234 + .byte 227 + .byte 235 + .byte 236 + .byte 228 + .byte 237 + .byte 229 + .byte 238 + .byte 230 + .byte 239 + .byte 231 + .byte 96 + .byte 104 + .byte 97 + .byte 105 + .byte 98 + .byte 106 + .byte 99 + .byte 107 + .byte 108 + .byte 100 + .byte 109 + .byte 101 + .byte 110 + .byte 102 + .byte 111 + .byte 103 + .byte 240 + .byte 248 + .byte 241 + .byte 249 + .byte 242 + .byte 250 + .byte 243 + .byte 251 + .byte 252 + .byte 244 + .byte 253 + .byte 245 + .byte 254 + .byte 246 + .byte 255 + .byte 247 + .byte 112 + .byte 120 + .byte 113 + .byte 121 + .byte 114 + .byte 122 + .byte 115 + .byte 123 + .byte 124 + .byte 116 + .byte 125 + .byte 117 + .byte 126 + .byte 118 + .byte 127 + .byte 119 + + .text +.global forkskinny_128_256_rounds + .type forkskinny_128_256_rounds, @function +forkskinny_128_256_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 51 + ldd r2,Z+32 + ldd r3,Z+33 + ldd r4,Z+34 + ldd r5,Z+35 + ldd r6,Z+36 + ldd r7,Z+37 + ldd r8,Z+38 + ldd r9,Z+39 + ldd r10,Z+40 + ldd r11,Z+41 + ldd r12,Z+42 + ldd r13,Z+43 + ldd r14,Z+44 + ldd r15,Z+45 + ldd r24,Z+46 + ldd r25,Z+47 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + lsl r20 + std Y+33,r20 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +86: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + mov r0,r14 + mov r14,r15 + mov r15,r24 + mov r24,r25 + mov r25,r0 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + movw r18,r14 + movw r26,r24 + eor r18,r10 + eor r19,r11 + eor r26,r12 + eor r27,r13 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r18 + movw r4,r26 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + ldd r0,Y+1 + std Y+9,r0 + ldd r0,Y+2 + std Y+10,r0 + ldd r0,Y+3 + std Y+11,r0 + ldd r0,Y+4 + std Y+12,r0 + ldd r0,Y+5 + std Y+13,r0 + ldd r0,Y+6 + std Y+14,r0 + ldd r0,Y+7 + std Y+15,r0 + ldd r0,Y+8 + std Y+16,r0 + std Y+1,r19 + std Y+2,r17 + std Y+3,r18 + std Y+4,r21 + std Y+5,r26 + std Y+6,r16 + std Y+7,r20 + std Y+8,r27 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r16,Y+31 + ldd r17,Y+32 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+17 + std Y+25,r0 + ldd r0,Y+18 + std Y+26,r0 + ldd r0,Y+19 + std Y+27,r0 + ldd r0,Y+20 + std Y+28,r0 + ldd r0,Y+21 + std Y+29,r0 + ldd r0,Y+22 + std Y+30,r0 + ldd r0,Y+23 + std Y+31,r0 + ldd r0,Y+24 + std Y+32,r0 + std Y+17,r19 + std Y+18,r17 + std Y+19,r18 + std Y+20,r21 + std Y+21,r26 + std Y+22,r16 + std Y+23,r20 + std Y+24,r27 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + ldd r18,Y+33 + cp r22,r18 + breq 5259f + rjmp 86b +5259: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+32,r2 + std Z+33,r3 + std Z+34,r4 + std Z+35,r5 + std Z+36,r6 + std Z+37,r7 + std Z+38,r8 + std Z+39,r9 + std Z+40,r10 + std Z+41,r11 + std Z+42,r12 + std Z+43,r13 + std Z+44,r14 + std Z+45,r15 + std Z+46,r24 + std Z+47,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + adiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_256_rounds, .-forkskinny_128_256_rounds + + .text +.global forkskinny_128_256_inv_rounds + .type forkskinny_128_256_inv_rounds, @function +forkskinny_128_256_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 51 + ldd r2,Z+32 + ldd r3,Z+33 + ldd r4,Z+34 + ldd r5,Z+35 + ldd r6,Z+36 + ldd r7,Z+37 + ldd r8,Z+38 + ldd r9,Z+39 + ldd r10,Z+40 + ldd r11,Z+41 + ldd r12,Z+42 + ldd r13,Z+43 + ldd r14,Z+44 + ldd r15,Z+45 + ldd r24,Z+46 + ldd r25,Z+47 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + lsl r20 + std Y+33,r20 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +86: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + ldd r0,Y+9 + std Y+1,r0 + ldd r0,Y+10 + std Y+2,r0 + ldd r0,Y+11 + std Y+3,r0 + ldd r0,Y+12 + std Y+4,r0 + ldd r0,Y+13 + std Y+5,r0 + ldd r0,Y+14 + std Y+6,r0 + ldd r0,Y+15 + std Y+7,r0 + ldd r0,Y+16 + std Y+8,r0 + std Y+9,r26 + std Y+10,r18 + std Y+11,r20 + std Y+12,r17 + std Y+13,r16 + std Y+14,r27 + std Y+15,r21 + std Y+16,r19 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r16,Y+23 + ldd r17,Y+24 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+25 + std Y+17,r0 + ldd r0,Y+26 + std Y+18,r0 + ldd r0,Y+27 + std Y+19,r0 + ldd r0,Y+28 + std Y+20,r0 + ldd r0,Y+29 + std Y+21,r0 + ldd r0,Y+30 + std Y+22,r0 + ldd r0,Y+31 + std Y+23,r0 + ldd r0,Y+32 + std Y+24,r0 + std Y+25,r26 + std Y+26,r18 + std Y+27,r20 + std Y+28,r17 + std Y+29,r16 + std Y+30,r27 + std Y+31,r21 + std Y+32,r19 + movw r18,r2 + movw r26,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r18 + movw r24,r26 + eor r14,r10 + eor r15,r11 + eor r24,r12 + eor r25,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + mov r0,r6 + mov r6,r7 + mov r7,r8 + mov r8,r9 + mov r9,r0 + mov r0,r10 + mov r10,r12 + mov r12,r0 + mov r0,r11 + mov r11,r13 + mov r13,r0 + mov r0,r25 + mov r25,r24 + mov r24,r15 + mov r15,r14 + mov r14,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + ldi r30,lo8(table_1) + ldi r31,hi8(table_1) +#if defined(RAMPZ) + ldi r18,hh8(table_1) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+33 + cp r22,r18 + breq 5259f + rjmp 86b +5259: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+32,r2 + std Z+33,r3 + std Z+34,r4 + std Z+35,r5 + std Z+36,r6 + std Z+37,r7 + std Z+38,r8 + std Z+39,r9 + std Z+40,r10 + std Z+41,r11 + std Z+42,r12 + std Z+43,r13 + std Z+44,r14 + std Z+45,r15 + std Z+46,r24 + std Z+47,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + adiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_256_inv_rounds, .-forkskinny_128_256_inv_rounds + + .text +.global forkskinny_128_256_forward_tk + .type forkskinny_128_256_forward_tk, @function +forkskinny_128_256_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 34 + ld r4,Z + ldd r5,Z+1 + ldd r6,Z+2 + ldd r7,Z+3 + ldd r8,Z+4 + ldd r9,Z+5 + ldd r10,Z+6 + ldd r11,Z+7 + ldd r12,Z+8 + ldd r13,Z+9 + ldd r14,Z+10 + ldd r15,Z+11 + ldd r24,Z+12 + ldd r25,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + push r31 + push r30 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r23,hh8(table_2) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +51: + movw r18,r12 + movw r20,r14 + movw r26,r24 + movw r2,r16 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + mov r4,r19 + mov r5,r3 + mov r6,r18 + mov r7,r27 + mov r8,r20 + mov r9,r2 + mov r10,r26 + mov r11,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r26,Y+13 + ldd r27,Y+14 + ldd r2,Y+15 + ldd r3,Y+16 + ldd r23,Y+1 + std Y+9,r23 + ldd r23,Y+2 + std Y+10,r23 + ldd r23,Y+3 + std Y+11,r23 + ldd r23,Y+4 + std Y+12,r23 + ldd r23,Y+5 + std Y+13,r23 + ldd r23,Y+6 + std Y+14,r23 + ldd r23,Y+7 + std Y+15,r23 + ldd r23,Y+8 + std Y+16,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+1,r19 + std Y+2,r3 + std Y+3,r18 + std Y+4,r27 + std Y+5,r20 + std Y+6,r2 + std Y+7,r26 + std Y+8,r21 + dec r22 + breq 5109f + rjmp 51b +5109: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r4 + std Z+1,r5 + std Z+2,r6 + std Z+3,r7 + std Z+4,r8 + std Z+5,r9 + std Z+6,r10 + std Z+7,r11 + std Z+8,r12 + std Z+9,r13 + std Z+10,r14 + std Z+11,r15 + std Z+12,r24 + std Z+13,r25 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_256_forward_tk, .-forkskinny_128_256_forward_tk + + .text +.global forkskinny_128_256_reverse_tk + .type forkskinny_128_256_reverse_tk, @function +forkskinny_128_256_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 34 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r23,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +51: + movw r18,r2 + movw r20,r4 + movw r26,r6 + movw r24,r8 + movw r2,r10 + movw r4,r12 + movw r6,r14 + movw r8,r16 + mov r10,r20 + mov r11,r18 + mov r12,r26 + mov r13,r25 + mov r14,r24 + mov r15,r21 + mov r16,r27 + mov r17,r19 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r24,Y+7 + ldd r25,Y+8 + ldd r23,Y+9 + std Y+1,r23 + ldd r23,Y+10 + std Y+2,r23 + ldd r23,Y+11 + std Y+3,r23 + ldd r23,Y+12 + std Y+4,r23 + ldd r23,Y+13 + std Y+5,r23 + ldd r23,Y+14 + std Y+6,r23 + ldd r23,Y+15 + std Y+7,r23 + ldd r23,Y+16 + std Y+8,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+9,r20 + std Y+10,r18 + std Y+11,r26 + std Y+12,r25 + std Y+13,r24 + std Y+14,r21 + std Y+15,r27 + std Y+16,r19 + dec r22 + breq 5109f + rjmp 51b +5109: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r2 + std Z+1,r3 + std Z+2,r4 + std Z+3,r5 + std Z+4,r6 + std Z+5,r7 + std Z+6,r8 + std Z+7,r9 + std Z+8,r10 + std Z+9,r11 + std Z+10,r12 + std Z+11,r13 + std Z+12,r14 + std Z+13,r15 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_256_reverse_tk, .-forkskinny_128_256_reverse_tk + + .text +.global forkskinny_128_384_rounds + .type forkskinny_128_384_rounds, @function +forkskinny_128_384_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 67 + ldd r2,Z+48 + ldd r3,Z+49 + ldd r4,Z+50 + ldd r5,Z+51 + ldd r6,Z+52 + ldd r7,Z+53 + ldd r8,Z+54 + ldd r9,Z+55 + ldd r10,Z+56 + ldd r11,Z+57 + ldd r12,Z+58 + ldd r13,Z+59 + ldd r14,Z+60 + ldd r15,Z+61 + ldd r24,Z+62 + ldd r25,Z+63 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r26,Z+34 + ldd r27,Z+35 + std Y+33,r18 + std Y+34,r19 + std Y+35,r26 + std Y+36,r27 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + std Y+37,r18 + std Y+38,r19 + std Y+39,r26 + std Y+40,r27 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r26,Z+42 + ldd r27,Z+43 + std Y+41,r18 + std Y+42,r19 + std Y+43,r26 + std Y+44,r27 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r26,Z+46 + ldd r27,Z+47 + std Y+45,r18 + std Y+46,r19 + std Y+47,r26 + std Y+48,r27 + lsl r20 + std Y+49,r20 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +118: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + mov r0,r14 + mov r14,r15 + mov r15,r24 + mov r24,r25 + mov r25,r0 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + movw r18,r14 + movw r26,r24 + eor r18,r10 + eor r19,r11 + eor r26,r12 + eor r27,r13 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r18 + movw r4,r26 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + ldd r0,Y+1 + std Y+9,r0 + ldd r0,Y+2 + std Y+10,r0 + ldd r0,Y+3 + std Y+11,r0 + ldd r0,Y+4 + std Y+12,r0 + ldd r0,Y+5 + std Y+13,r0 + ldd r0,Y+6 + std Y+14,r0 + ldd r0,Y+7 + std Y+15,r0 + ldd r0,Y+8 + std Y+16,r0 + std Y+1,r19 + std Y+2,r17 + std Y+3,r18 + std Y+4,r21 + std Y+5,r26 + std Y+6,r16 + std Y+7,r20 + std Y+8,r27 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r16,Y+31 + ldd r17,Y+32 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+17 + std Y+25,r0 + ldd r0,Y+18 + std Y+26,r0 + ldd r0,Y+19 + std Y+27,r0 + ldd r0,Y+20 + std Y+28,r0 + ldd r0,Y+21 + std Y+29,r0 + ldd r0,Y+22 + std Y+30,r0 + ldd r0,Y+23 + std Y+31,r0 + ldd r0,Y+24 + std Y+32,r0 + std Y+17,r19 + std Y+18,r17 + std Y+19,r18 + std Y+20,r21 + std Y+21,r26 + std Y+22,r16 + std Y+23,r20 + std Y+24,r27 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + ldd r20,Y+45 + ldd r21,Y+46 + ldd r16,Y+47 + ldd r17,Y+48 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+33 + std Y+41,r0 + ldd r0,Y+34 + std Y+42,r0 + ldd r0,Y+35 + std Y+43,r0 + ldd r0,Y+36 + std Y+44,r0 + ldd r0,Y+37 + std Y+45,r0 + ldd r0,Y+38 + std Y+46,r0 + ldd r0,Y+39 + std Y+47,r0 + ldd r0,Y+40 + std Y+48,r0 + std Y+33,r19 + std Y+34,r17 + std Y+35,r18 + std Y+36,r21 + std Y+37,r26 + std Y+38,r16 + std Y+39,r20 + std Y+40,r27 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + ldd r18,Y+49 + cp r22,r18 + breq 5348f + rjmp 118b +5348: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+48,r2 + std Z+49,r3 + std Z+50,r4 + std Z+51,r5 + std Z+52,r6 + std Z+53,r7 + std Z+54,r8 + std Z+55,r9 + std Z+56,r10 + std Z+57,r11 + std Z+58,r12 + std Z+59,r13 + std Z+60,r14 + std Z+61,r15 + std Z+62,r24 + std Z+63,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + std Z+32,r18 + std Z+33,r19 + std Z+34,r26 + std Z+35,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + std Z+36,r18 + std Z+37,r19 + std Z+38,r26 + std Z+39,r27 + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + std Z+40,r18 + std Z+41,r19 + std Z+42,r26 + std Z+43,r27 + ldd r18,Y+45 + ldd r19,Y+46 + ldd r26,Y+47 + ldd r27,Y+48 + std Z+44,r18 + std Z+45,r19 + std Z+46,r26 + std Z+47,r27 + adiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_384_rounds, .-forkskinny_128_384_rounds + + .text +.global forkskinny_128_384_inv_rounds + .type forkskinny_128_384_inv_rounds, @function +forkskinny_128_384_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 67 + ldd r2,Z+48 + ldd r3,Z+49 + ldd r4,Z+50 + ldd r5,Z+51 + ldd r6,Z+52 + ldd r7,Z+53 + ldd r8,Z+54 + ldd r9,Z+55 + ldd r10,Z+56 + ldd r11,Z+57 + ldd r12,Z+58 + ldd r13,Z+59 + ldd r14,Z+60 + ldd r15,Z+61 + ldd r24,Z+62 + ldd r25,Z+63 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r26,Z+34 + ldd r27,Z+35 + std Y+33,r18 + std Y+34,r19 + std Y+35,r26 + std Y+36,r27 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + std Y+37,r18 + std Y+38,r19 + std Y+39,r26 + std Y+40,r27 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r26,Z+42 + ldd r27,Z+43 + std Y+41,r18 + std Y+42,r19 + std Y+43,r26 + std Y+44,r27 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r26,Z+46 + ldd r27,Z+47 + std Y+45,r18 + std Y+46,r19 + std Y+47,r26 + std Y+48,r27 + lsl r20 + std Y+49,r20 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +118: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + ldd r0,Y+9 + std Y+1,r0 + ldd r0,Y+10 + std Y+2,r0 + ldd r0,Y+11 + std Y+3,r0 + ldd r0,Y+12 + std Y+4,r0 + ldd r0,Y+13 + std Y+5,r0 + ldd r0,Y+14 + std Y+6,r0 + ldd r0,Y+15 + std Y+7,r0 + ldd r0,Y+16 + std Y+8,r0 + std Y+9,r26 + std Y+10,r18 + std Y+11,r20 + std Y+12,r17 + std Y+13,r16 + std Y+14,r27 + std Y+15,r21 + std Y+16,r19 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r16,Y+23 + ldd r17,Y+24 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+25 + std Y+17,r0 + ldd r0,Y+26 + std Y+18,r0 + ldd r0,Y+27 + std Y+19,r0 + ldd r0,Y+28 + std Y+20,r0 + ldd r0,Y+29 + std Y+21,r0 + ldd r0,Y+30 + std Y+22,r0 + ldd r0,Y+31 + std Y+23,r0 + ldd r0,Y+32 + std Y+24,r0 + std Y+25,r26 + std Y+26,r18 + std Y+27,r20 + std Y+28,r17 + std Y+29,r16 + std Y+30,r27 + std Y+31,r21 + std Y+32,r19 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + ldd r20,Y+37 + ldd r21,Y+38 + ldd r16,Y+39 + ldd r17,Y+40 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+41 + std Y+33,r0 + ldd r0,Y+42 + std Y+34,r0 + ldd r0,Y+43 + std Y+35,r0 + ldd r0,Y+44 + std Y+36,r0 + ldd r0,Y+45 + std Y+37,r0 + ldd r0,Y+46 + std Y+38,r0 + ldd r0,Y+47 + std Y+39,r0 + ldd r0,Y+48 + std Y+40,r0 + std Y+41,r26 + std Y+42,r18 + std Y+43,r20 + std Y+44,r17 + std Y+45,r16 + std Y+46,r27 + std Y+47,r21 + std Y+48,r19 + movw r18,r2 + movw r26,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r18 + movw r24,r26 + eor r14,r10 + eor r15,r11 + eor r24,r12 + eor r25,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + mov r0,r6 + mov r6,r7 + mov r7,r8 + mov r8,r9 + mov r9,r0 + mov r0,r10 + mov r10,r12 + mov r12,r0 + mov r0,r11 + mov r11,r13 + mov r13,r0 + mov r0,r25 + mov r25,r24 + mov r24,r15 + mov r15,r14 + mov r14,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + ldi r30,lo8(table_1) + ldi r31,hi8(table_1) +#if defined(RAMPZ) + ldi r18,hh8(table_1) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+49 + cp r22,r18 + breq 5348f + rjmp 118b +5348: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+48,r2 + std Z+49,r3 + std Z+50,r4 + std Z+51,r5 + std Z+52,r6 + std Z+53,r7 + std Z+54,r8 + std Z+55,r9 + std Z+56,r10 + std Z+57,r11 + std Z+58,r12 + std Z+59,r13 + std Z+60,r14 + std Z+61,r15 + std Z+62,r24 + std Z+63,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + std Z+32,r18 + std Z+33,r19 + std Z+34,r26 + std Z+35,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + std Z+36,r18 + std Z+37,r19 + std Z+38,r26 + std Z+39,r27 + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + std Z+40,r18 + std Z+41,r19 + std Z+42,r26 + std Z+43,r27 + ldd r18,Y+45 + ldd r19,Y+46 + ldd r26,Y+47 + ldd r27,Y+48 + std Z+44,r18 + std Z+45,r19 + std Z+46,r26 + std Z+47,r27 + adiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_384_inv_rounds, .-forkskinny_128_384_inv_rounds + + .text +.global forkskinny_128_384_forward_tk + .type forkskinny_128_384_forward_tk, @function +forkskinny_128_384_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r4,Z + ldd r5,Z+1 + ldd r6,Z+2 + ldd r7,Z+3 + ldd r8,Z+4 + ldd r9,Z+5 + ldd r10,Z+6 + ldd r11,Z+7 + ldd r12,Z+8 + ldd r13,Z+9 + ldd r14,Z+10 + ldd r15,Z+11 + ldd r24,Z+12 + ldd r25,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + std Y+17,r18 + std Y+18,r19 + std Y+19,r20 + std Y+20,r21 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r20,Z+38 + ldd r21,Z+39 + std Y+21,r18 + std Y+22,r19 + std Y+23,r20 + std Y+24,r21 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r20,Z+42 + ldd r21,Z+43 + std Y+25,r18 + std Y+26,r19 + std Y+27,r20 + std Y+28,r21 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r20,Z+46 + ldd r21,Z+47 + std Y+29,r18 + std Y+30,r19 + std Y+31,r20 + std Y+32,r21 + push r31 + push r30 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r23,hh8(table_2) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +83: + movw r18,r12 + movw r20,r14 + movw r26,r24 + movw r2,r16 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + mov r4,r19 + mov r5,r3 + mov r6,r18 + mov r7,r27 + mov r8,r20 + mov r9,r2 + mov r10,r26 + mov r11,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r26,Y+13 + ldd r27,Y+14 + ldd r2,Y+15 + ldd r3,Y+16 + ldd r23,Y+1 + std Y+9,r23 + ldd r23,Y+2 + std Y+10,r23 + ldd r23,Y+3 + std Y+11,r23 + ldd r23,Y+4 + std Y+12,r23 + ldd r23,Y+5 + std Y+13,r23 + ldd r23,Y+6 + std Y+14,r23 + ldd r23,Y+7 + std Y+15,r23 + ldd r23,Y+8 + std Y+16,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+1,r19 + std Y+2,r3 + std Y+3,r18 + std Y+4,r27 + std Y+5,r20 + std Y+6,r2 + std Y+7,r26 + std Y+8,r21 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + ldd r26,Y+29 + ldd r27,Y+30 + ldd r2,Y+31 + ldd r3,Y+32 + ldd r23,Y+17 + std Y+25,r23 + ldd r23,Y+18 + std Y+26,r23 + ldd r23,Y+19 + std Y+27,r23 + ldd r23,Y+20 + std Y+28,r23 + ldd r23,Y+21 + std Y+29,r23 + ldd r23,Y+22 + std Y+30,r23 + ldd r23,Y+23 + std Y+31,r23 + ldd r23,Y+24 + std Y+32,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+17,r19 + std Y+18,r3 + std Y+19,r18 + std Y+20,r27 + std Y+21,r20 + std Y+22,r2 + std Y+23,r26 + std Y+24,r21 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + breq 5183f + rjmp 83b +5183: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r4 + std Z+1,r5 + std Z+2,r6 + std Z+3,r7 + std Z+4,r8 + std Z+5,r9 + std Z+6,r10 + std Z+7,r11 + std Z+8,r12 + std Z+9,r13 + std Z+10,r14 + std Z+11,r15 + std Z+12,r24 + std Z+13,r25 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r20,Y+23 + ldd r21,Y+24 + std Z+36,r18 + std Z+37,r19 + std Z+38,r20 + std Z+39,r21 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + std Z+40,r18 + std Z+41,r19 + std Z+42,r20 + std Z+43,r21 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r20,Y+31 + ldd r21,Y+32 + std Z+44,r18 + std Z+45,r19 + std Z+46,r20 + std Z+47,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_384_forward_tk, .-forkskinny_128_384_forward_tk + + .text +.global forkskinny_128_384_reverse_tk + .type forkskinny_128_384_reverse_tk, @function +forkskinny_128_384_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + std Y+17,r18 + std Y+18,r19 + std Y+19,r20 + std Y+20,r21 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r20,Z+38 + ldd r21,Z+39 + std Y+21,r18 + std Y+22,r19 + std Y+23,r20 + std Y+24,r21 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r20,Z+42 + ldd r21,Z+43 + std Y+25,r18 + std Y+26,r19 + std Y+27,r20 + std Y+28,r21 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r20,Z+46 + ldd r21,Z+47 + std Y+29,r18 + std Y+30,r19 + std Y+31,r20 + std Y+32,r21 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r23,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +83: + movw r18,r2 + movw r20,r4 + movw r26,r6 + movw r24,r8 + movw r2,r10 + movw r4,r12 + movw r6,r14 + movw r8,r16 + mov r10,r20 + mov r11,r18 + mov r12,r26 + mov r13,r25 + mov r14,r24 + mov r15,r21 + mov r16,r27 + mov r17,r19 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r24,Y+7 + ldd r25,Y+8 + ldd r23,Y+9 + std Y+1,r23 + ldd r23,Y+10 + std Y+2,r23 + ldd r23,Y+11 + std Y+3,r23 + ldd r23,Y+12 + std Y+4,r23 + ldd r23,Y+13 + std Y+5,r23 + ldd r23,Y+14 + std Y+6,r23 + ldd r23,Y+15 + std Y+7,r23 + ldd r23,Y+16 + std Y+8,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+9,r20 + std Y+10,r18 + std Y+11,r26 + std Y+12,r25 + std Y+13,r24 + std Y+14,r21 + std Y+15,r27 + std Y+16,r19 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + ldd r26,Y+21 + ldd r27,Y+22 + ldd r24,Y+23 + ldd r25,Y+24 + ldd r23,Y+25 + std Y+17,r23 + ldd r23,Y+26 + std Y+18,r23 + ldd r23,Y+27 + std Y+19,r23 + ldd r23,Y+28 + std Y+20,r23 + ldd r23,Y+29 + std Y+21,r23 + ldd r23,Y+30 + std Y+22,r23 + ldd r23,Y+31 + std Y+23,r23 + ldd r23,Y+32 + std Y+24,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+25,r20 + std Y+26,r18 + std Y+27,r26 + std Y+28,r25 + std Y+29,r24 + std Y+30,r21 + std Y+31,r27 + std Y+32,r19 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + breq 5183f + rjmp 83b +5183: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r2 + std Z+1,r3 + std Z+2,r4 + std Z+3,r5 + std Z+4,r6 + std Z+5,r7 + std Z+6,r8 + std Z+7,r9 + std Z+8,r10 + std Z+9,r11 + std Z+10,r12 + std Z+11,r13 + std Z+12,r14 + std Z+13,r15 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r20,Y+23 + ldd r21,Y+24 + std Z+36,r18 + std Z+37,r19 + std Z+38,r20 + std Z+39,r21 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + std Z+40,r18 + std Z+41,r19 + std Z+42,r20 + std Z+43,r21 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r20,Y+31 + ldd r21,Y+32 + std Z+44,r18 + std Z+45,r19 + std Z+46,r20 + std Z+47,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_384_reverse_tk, .-forkskinny_128_384_reverse_tk + + .text +.global forkskinny_64_192_rounds + .type forkskinny_64_192_rounds, @function +forkskinny_64_192_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 38 + ldd r26,Z+24 + ldd r27,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + ld r18,Z + ldd r19,Z+1 + std Y+1,r18 + std Y+2,r19 + ldd r18,Z+2 + ldd r19,Z+3 + std Y+3,r18 + std Y+4,r19 + ldd r18,Z+4 + ldd r19,Z+5 + std Y+5,r18 + std Y+6,r19 + ldd r18,Z+6 + ldd r19,Z+7 + std Y+7,r18 + std Y+8,r19 + ldd r18,Z+8 + ldd r19,Z+9 + std Y+9,r18 + std Y+10,r19 + ldd r18,Z+10 + ldd r19,Z+11 + std Y+11,r18 + std Y+12,r19 + ldd r18,Z+12 + ldd r19,Z+13 + std Y+13,r18 + std Y+14,r19 + ldd r18,Z+14 + ldd r19,Z+15 + std Y+15,r18 + std Y+16,r19 + ldd r18,Z+16 + ldd r19,Z+17 + std Y+17,r18 + std Y+18,r19 + ldd r18,Z+18 + ldd r19,Z+19 + std Y+19,r18 + std Y+20,r19 + ldd r18,Z+20 + ldd r19,Z+21 + std Y+21,r18 + std Y+22,r19 + ldd r18,Z+22 + ldd r19,Z+23 + std Y+23,r18 + std Y+24,r19 + push r31 + push r30 + ldi r30,lo8(table_5) + ldi r31,hi8(table_5) +#if defined(RAMPZ) + ldi r18,hh8(table_5) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 + lsl r20 +61: + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + eor r26,r18 + eor r27,r19 + ldd r18,Y+3 + ldd r19,Y+4 + eor r2,r18 + eor r3,r19 + ldd r18,Y+9 + ldd r19,Y+10 + eor r26,r18 + eor r27,r19 + ldd r18,Y+11 + ldd r19,Y+12 + eor r2,r18 + eor r3,r19 + ldd r18,Y+17 + ldd r19,Y+18 + eor r26,r18 + eor r27,r19 + ldd r18,Y+19 + ldd r19,Y+20 + eor r2,r18 + eor r3,r19 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r27,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r3,r18 + ldi r18,32 + eor r5,r18 + eor r26,r18 + mov r0,r1 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + or r3,r0 + mov r0,r4 + mov r4,r5 + mov r5,r0 + mov r0,r6 + mov r6,r7 + mov r7,r0 + mov r0,r1 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + or r7,r0 + eor r2,r4 + eor r3,r5 + eor r4,r26 + eor r5,r27 + movw r18,r6 + eor r18,r4 + eor r19,r5 + movw r6,r4 + movw r4,r2 + movw r2,r26 + movw r26,r18 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r8,Y+3 + ldd r9,Y+4 + ldd r10,Y+5 + ldd r11,Y+6 + ldd r12,Y+7 + ldd r13,Y+8 + std Y+5,r18 + std Y+6,r19 + std Y+7,r8 + std Y+8,r9 + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + ldi r25,240 + and r9,r25 + swap r12 + ldi r24,15 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+1,r18 + std Y+2,r19 + std Y+3,r8 + std Y+4,r9 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r18,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+9 + ldd r19,Y+10 + ldd r8,Y+11 + ldd r9,Y+12 + ldd r10,Y+13 + ldd r11,Y+14 + ldd r12,Y+15 + ldd r13,Y+16 + std Y+13,r18 + std Y+14,r19 + std Y+15,r8 + std Y+16,r9 + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + and r9,r25 + swap r12 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+9,r18 + std Y+10,r19 + std Y+11,r8 + std Y+12,r9 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r8,Y+19 + ldd r9,Y+20 + ldd r10,Y+21 + ldd r11,Y+22 + ldd r12,Y+23 + ldd r13,Y+24 + std Y+21,r18 + std Y+22,r19 + std Y+23,r8 + std Y+24,r9 + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + and r9,r25 + swap r12 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+17,r18 + std Y+18,r19 + std Y+19,r8 + std Y+20,r9 + ldi r30,lo8(table_5) + ldi r31,hi8(table_5) +#if defined(RAMPZ) + ldi r18,hh8(table_5) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + cp r22,r20 + breq 5273f + rjmp 61b +5273: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+24,r26 + std Z+25,r27 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Y+1 + ldd r19,Y+2 + st Z,r18 + std Z+1,r19 + ldd r18,Y+3 + ldd r19,Y+4 + std Z+2,r18 + std Z+3,r19 + ldd r18,Y+5 + ldd r19,Y+6 + std Z+4,r18 + std Z+5,r19 + ldd r18,Y+7 + ldd r19,Y+8 + std Z+6,r18 + std Z+7,r19 + ldd r18,Y+9 + ldd r19,Y+10 + std Z+8,r18 + std Z+9,r19 + ldd r18,Y+11 + ldd r19,Y+12 + std Z+10,r18 + std Z+11,r19 + ldd r18,Y+13 + ldd r19,Y+14 + std Z+12,r18 + std Z+13,r19 + ldd r18,Y+15 + ldd r19,Y+16 + std Z+14,r18 + std Z+15,r19 + ldd r18,Y+17 + ldd r19,Y+18 + std Z+16,r18 + std Z+17,r19 + ldd r18,Y+19 + ldd r19,Y+20 + std Z+18,r18 + std Z+19,r19 + ldd r18,Y+21 + ldd r19,Y+22 + std Z+20,r18 + std Z+21,r19 + ldd r18,Y+23 + ldd r19,Y+24 + std Z+22,r18 + std Z+23,r19 + adiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_rounds, .-forkskinny_64_192_rounds + + .text +.global forkskinny_64_192_inv_rounds + .type forkskinny_64_192_inv_rounds, @function +forkskinny_64_192_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 38 + ldd r26,Z+24 + ldd r27,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + ld r18,Z + ldd r19,Z+1 + std Y+1,r18 + std Y+2,r19 + ldd r18,Z+2 + ldd r19,Z+3 + std Y+3,r18 + std Y+4,r19 + ldd r18,Z+4 + ldd r19,Z+5 + std Y+5,r18 + std Y+6,r19 + ldd r18,Z+6 + ldd r19,Z+7 + std Y+7,r18 + std Y+8,r19 + ldd r18,Z+8 + ldd r19,Z+9 + std Y+9,r18 + std Y+10,r19 + ldd r18,Z+10 + ldd r19,Z+11 + std Y+11,r18 + std Y+12,r19 + ldd r18,Z+12 + ldd r19,Z+13 + std Y+13,r18 + std Y+14,r19 + ldd r18,Z+14 + ldd r19,Z+15 + std Y+15,r18 + std Y+16,r19 + ldd r18,Z+16 + ldd r19,Z+17 + std Y+17,r18 + std Y+18,r19 + ldd r18,Z+18 + ldd r19,Z+19 + std Y+19,r18 + std Y+20,r19 + ldd r18,Z+20 + ldd r19,Z+21 + std Y+21,r18 + std Y+22,r19 + ldd r18,Z+22 + ldd r19,Z+23 + std Y+23,r18 + std Y+24,r19 + push r31 + push r30 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 + lsl r20 +61: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r8,Y+3 + ldd r9,Y+4 + ldd r10,Y+5 + ldd r11,Y+6 + ldd r12,Y+7 + ldd r13,Y+8 + std Y+1,r10 + std Y+2,r11 + std Y+3,r12 + std Y+4,r13 + mov r11,r18 + ldi r25,240 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+5,r10 + std Y+6,r11 + std Y+7,r12 + std Y+8,r13 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r8,Y+11 + ldd r9,Y+12 + ldd r10,Y+13 + ldd r11,Y+14 + ldd r12,Y+15 + ldd r13,Y+16 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r11,r18 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+13,r10 + std Y+14,r11 + std Y+15,r12 + std Y+16,r13 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r18,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r8,Y+19 + ldd r9,Y+20 + ldd r10,Y+21 + ldd r11,Y+22 + ldd r12,Y+23 + ldd r13,Y+24 + std Y+17,r10 + std Y+18,r11 + std Y+19,r12 + std Y+20,r13 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r11,r18 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+21,r10 + std Y+22,r11 + std Y+23,r12 + std Y+24,r13 + movw r18,r26 + movw r26,r2 + movw r2,r4 + movw r4,r6 + movw r6,r18 + eor r6,r4 + eor r7,r5 + eor r4,r26 + eor r5,r27 + eor r2,r4 + eor r3,r5 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + mov r0,r5 + mov r5,r4 + mov r4,r0 + mov r0,r7 + mov r7,r6 + mov r6,r0 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + ldd r18,Y+1 + ldd r19,Y+2 + eor r26,r18 + eor r27,r19 + ldd r18,Y+3 + ldd r19,Y+4 + eor r2,r18 + eor r3,r19 + ldd r18,Y+9 + ldd r19,Y+10 + eor r26,r18 + eor r27,r19 + ldd r18,Y+11 + ldd r19,Y+12 + eor r2,r18 + eor r3,r19 + ldd r18,Y+17 + ldd r19,Y+18 + eor r26,r18 + eor r27,r19 + ldd r18,Y+19 + ldd r19,Y+20 + eor r2,r18 + eor r3,r19 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r3,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r27,r18 + ldi r18,32 + eor r5,r18 + eor r26,r18 + ldi r30,lo8(table_6) + ldi r31,hi8(table_6) +#if defined(RAMPZ) + ldi r18,hh8(table_6) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + cp r22,r20 + breq 5268f + rjmp 61b +5268: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+24,r26 + std Z+25,r27 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Y+1 + ldd r19,Y+2 + st Z,r18 + std Z+1,r19 + ldd r18,Y+3 + ldd r19,Y+4 + std Z+2,r18 + std Z+3,r19 + ldd r18,Y+5 + ldd r19,Y+6 + std Z+4,r18 + std Z+5,r19 + ldd r18,Y+7 + ldd r19,Y+8 + std Z+6,r18 + std Z+7,r19 + ldd r18,Y+9 + ldd r19,Y+10 + std Z+8,r18 + std Z+9,r19 + ldd r18,Y+11 + ldd r19,Y+12 + std Z+10,r18 + std Z+11,r19 + ldd r18,Y+13 + ldd r19,Y+14 + std Z+12,r18 + std Z+13,r19 + ldd r18,Y+15 + ldd r19,Y+16 + std Z+14,r18 + std Z+15,r19 + ldd r18,Y+17 + ldd r19,Y+18 + std Z+16,r18 + std Z+17,r19 + ldd r18,Y+19 + ldd r19,Y+20 + std Z+18,r18 + std Z+19,r19 + ldd r18,Y+21 + ldd r19,Y+22 + std Z+20,r18 + std Z+21,r19 + ldd r18,Y+23 + ldd r19,Y+24 + std Z+22,r18 + std Z+23,r19 + adiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_inv_rounds, .-forkskinny_64_192_inv_rounds + + .text +.global forkskinny_64_192_forward_tk + .type forkskinny_64_192_forward_tk, @function +forkskinny_64_192_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r28,Z+6 + ldd r29,Z+7 + ldd r2,Z+8 + ldd r3,Z+9 + ldd r4,Z+10 + ldd r5,Z+11 + ldd r6,Z+12 + ldd r7,Z+13 + ldd r8,Z+14 + ldd r9,Z+15 + ldd r10,Z+16 + ldd r11,Z+17 + ldd r12,Z+18 + ldd r13,Z+19 + ldd r14,Z+20 + ldd r15,Z+21 + ldd r24,Z+22 + ldd r25,Z+23 + push r31 + push r30 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +27: + push r19 + push r18 + push r21 + push r20 + mov r19,r27 + swap r19 + andi r19,240 + mov r23,r28 + andi r23,15 + or r19,r23 + mov r18,r27 + andi r18,240 + mov r23,r29 + andi r23,15 + or r18,r23 + mov r21,r26 + andi r21,240 + swap r28 + andi r28,15 + or r21,r28 + mov r20,r29 + andi r20,240 + andi r26,15 + or r20,r26 + pop r28 + pop r29 + pop r26 + pop r27 + push r3 + push r2 + push r5 + push r4 + mov r3,r7 + swap r3 + ldi r17,240 + and r3,r17 + mov r23,r8 + andi r23,15 + or r3,r23 + mov r2,r7 + and r2,r17 + mov r23,r9 + andi r23,15 + or r2,r23 + mov r5,r6 + and r5,r17 + swap r8 + ldi r16,15 + and r8,r16 + or r5,r8 + mov r4,r9 + and r4,r17 + and r6,r16 + or r4,r6 + pop r8 + pop r9 + pop r6 + pop r7 + push r11 + push r10 + push r13 + push r12 + mov r11,r15 + swap r11 + and r11,r17 + mov r23,r24 + andi r23,15 + or r11,r23 + mov r10,r15 + and r10,r17 + mov r23,r25 + andi r23,15 + or r10,r23 + mov r13,r14 + and r13,r17 + swap r24 + andi r24,15 + or r13,r24 + mov r12,r25 + and r12,r17 + and r14,r16 + or r12,r14 + pop r24 + pop r25 + pop r14 + pop r15 + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + dec r22 + breq 5125f + rjmp 27b +5125: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r26 + std Z+5,r27 + std Z+6,r28 + std Z+7,r29 + std Z+8,r2 + std Z+9,r3 + std Z+10,r4 + std Z+11,r5 + std Z+12,r6 + std Z+13,r7 + std Z+14,r8 + std Z+15,r9 + std Z+16,r10 + std Z+17,r11 + std Z+18,r12 + std Z+19,r13 + std Z+20,r14 + std Z+21,r15 + std Z+22,r24 + std Z+23,r25 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_forward_tk, .-forkskinny_64_192_forward_tk + + .text +.global forkskinny_64_192_reverse_tk + .type forkskinny_64_192_reverse_tk, @function +forkskinny_64_192_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r28,Z+6 + ldd r29,Z+7 + ldd r2,Z+8 + ldd r3,Z+9 + ldd r4,Z+10 + ldd r5,Z+11 + ldd r6,Z+12 + ldd r7,Z+13 + ldd r8,Z+14 + ldd r9,Z+15 + ldd r10,Z+16 + ldd r11,Z+17 + ldd r12,Z+18 + ldd r13,Z+19 + ldd r14,Z+20 + ldd r15,Z+21 + ldd r24,Z+22 + ldd r25,Z+23 + push r31 + push r30 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +27: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + push r27 + push r26 + push r29 + push r28 + mov r27,r18 + andi r27,240 + mov r23,r19 + swap r23 + andi r23,15 + or r27,r23 + mov r26,r21 + andi r26,240 + mov r23,r20 + andi r23,15 + or r26,r23 + mov r29,r20 + andi r29,240 + andi r18,15 + or r29,r18 + mov r28,r21 + swap r28 + andi r28,240 + andi r19,15 + or r28,r19 + pop r20 + pop r21 + pop r18 + pop r19 + push r7 + push r6 + push r9 + push r8 + mov r7,r2 + ldi r17,240 + and r7,r17 + mov r23,r3 + swap r23 + andi r23,15 + or r7,r23 + mov r6,r5 + and r6,r17 + mov r23,r4 + andi r23,15 + or r6,r23 + mov r9,r4 + and r9,r17 + ldi r16,15 + and r2,r16 + or r9,r2 + mov r8,r5 + swap r8 + and r8,r17 + and r3,r16 + or r8,r3 + pop r4 + pop r5 + pop r2 + pop r3 + push r15 + push r14 + push r25 + push r24 + mov r15,r10 + and r15,r17 + mov r23,r11 + swap r23 + andi r23,15 + or r15,r23 + mov r14,r13 + and r14,r17 + mov r23,r12 + andi r23,15 + or r14,r23 + mov r25,r12 + andi r25,240 + and r10,r16 + or r25,r10 + mov r24,r13 + swap r24 + andi r24,240 + and r11,r16 + or r24,r11 + pop r12 + pop r13 + pop r10 + pop r11 + dec r22 + breq 5125f + rjmp 27b +5125: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r26 + std Z+5,r27 + std Z+6,r28 + std Z+7,r29 + std Z+8,r2 + std Z+9,r3 + std Z+10,r4 + std Z+11,r5 + std Z+12,r6 + std Z+13,r7 + std Z+14,r8 + std Z+15,r9 + std Z+16,r10 + std Z+17,r11 + std Z+18,r12 + std Z+19,r13 + std Z+20,r14 + std Z+21,r15 + std Z+22,r24 + std Z+23,r25 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_reverse_tk, .-forkskinny_64_192_reverse_tk + +#endif diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/rhys/internal-forkskinny.c b/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/rhys/internal-forkskinny.c index b050ff1..6e2ac55 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/rhys/internal-forkskinny.c +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/rhys/internal-forkskinny.c @@ -40,35 +40,10 @@ static unsigned char const RC[87] = { 0x4a, 0x14, 0x29, 0x52, 0x24, 0x48, 0x10 }; -/** - * \brief Number of rounds of ForkSkinny-128-256 before forking. - */ -#define FORKSKINNY_128_256_ROUNDS_BEFORE 21 - -/** - * \brief Number of rounds of ForkSkinny-128-256 after forking. - */ -#define FORKSKINNY_128_256_ROUNDS_AFTER 27 - -/** - * \brief State information for ForkSkinny-128-256. - */ -typedef struct -{ - uint32_t TK1[4]; /**< First part of the tweakey */ - uint32_t TK2[4]; /**< Second part of the tweakey */ - uint32_t S[4]; /**< Current block state */ +#if !defined(__AVR__) -} forkskinny_128_256_state_t; - -/** - * \brief Applies one round of ForkSkinny-128-256. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_256_round - (forkskinny_128_256_state_t *state, unsigned round) +void forkskinny_128_256_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -79,137 +54,52 @@ static void forkskinny_128_256_round s2 = state->S[2]; s3 = state->S[3]; - /* Apply the S-box to all cells in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny128_sbox(s0); + skinny128_sbox(s1); + skinny128_sbox(s2); + skinny128_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); + s2 ^= 0x02; + + /* Shift the cells in the rows right, which moves the cell + * values up closer to the MSB. That is, we do a left rotate + * on the word to rotate the cells in the word right */ + s1 = leftRotate8(s1); + s2 = leftRotate16(s2); + s3 = leftRotate24(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + + /* Permute TK1 and TK2 for the next round */ + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + } /* Save the local variables back to the state */ state->S[0] = s0; state->S[1] = s1; state->S[2] = s2; state->S[3] = s3; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(state->TK1); - skinny128_permute_tk(state->TK2); - skinny128_LFSR2(state->TK2[0]); - skinny128_LFSR2(state->TK2[1]); -} - -void forkskinny_128_256_encrypt - (const unsigned char key[32], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_256_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_128_256_ROUNDS_BEFORE; ++round) { - forkskinny_128_256_round(&state, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint32_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&state, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x08040201U; /* Branching constant */ - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - for (round = (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); ++round) { - forkskinny_128_256_round(&state, round); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&state, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - } } -/** - * \brief Applies one round of ForkSkinny-128-256 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_256_inv_round - (forkskinny_128_256_state_t *state, unsigned round) +void forkskinny_128_256_inv_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -220,39 +110,42 @@ static void forkskinny_128_256_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Permute TK1 and TK2 for the next round */ - skinny128_inv_LFSR2(state->TK2[0]); - skinny128_inv_LFSR2(state->TK2[1]); - skinny128_inv_permute_tk(state->TK1); - skinny128_inv_permute_tk(state->TK2); - - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left, which moves the cell - * values down closer to the LSB. That is, we do a right - * rotate on the word to rotate the cells in the word left */ - s1 = rightRotate8(s1); - s2 = rightRotate16(s2); - s3 = rightRotate24(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1 and TK2 for the next round */ + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left, which moves the cell + * values down closer to the LSB. That is, we do a right + * rotate on the word to rotate the cells in the word left */ + s1 = rightRotate8(s1); + s2 = rightRotate16(s2); + s3 = rightRotate24(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); + s2 ^= 0x02; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny128_inv_sbox(s0); + skinny128_inv_sbox(s1); + skinny128_inv_sbox(s2); + skinny128_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -261,115 +154,64 @@ static void forkskinny_128_256_inv_round state->S[3] = s3; } -void forkskinny_128_256_decrypt - (const unsigned char key[32], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_128_256_forward_tk + (forkskinny_128_256_state_t *state, unsigned rounds) { - forkskinny_128_256_state_t state; - forkskinny_128_256_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); ++round) { - skinny128_permute_tk(state.TK1); - skinny128_permute_tk(state.TK2); - skinny128_LFSR2(state.TK2[0]); - skinny128_LFSR2(state.TK2[1]); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR2(state->TK2[2]); + skinny128_LFSR2(state->TK2[3]); + } + rounds -= 16; } - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); - round > (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); --round) { - forkskinny_128_256_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + --rounds; } +} - /* Remove the branching constant */ - state.S[0] ^= 0x08040201U; - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_128_256_ROUNDS_AFTER; ++round) { - skinny128_inv_LFSR2(state.TK2[0]); - skinny128_inv_LFSR2(state.TK2[1]); - skinny128_inv_permute_tk(state.TK1); - skinny128_inv_permute_tk(state.TK2); +void forkskinny_128_256_reverse_tk + (forkskinny_128_256_state_t *state, unsigned rounds) +{ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR2(state->TK2[2]); + skinny128_inv_LFSR2(state->TK2[3]); + } + rounds -= 16; } - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_128_256_inv_round(&state, round - 1); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&fstate, round); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + --rounds; } - le_store_word32(output_right, fstate.S[0]); - le_store_word32(output_right + 4, fstate.S[1]); - le_store_word32(output_right + 8, fstate.S[2]); - le_store_word32(output_right + 12, fstate.S[3]); } -/** - * \brief Number of rounds of ForkSkinny-128-384 before forking. - */ -#define FORKSKINNY_128_384_ROUNDS_BEFORE 25 - -/** - * \brief Number of rounds of ForkSkinny-128-384 after forking. - */ -#define FORKSKINNY_128_384_ROUNDS_AFTER 31 - -/** - * \brief State information for ForkSkinny-128-384. - */ -typedef struct -{ - uint32_t TK1[4]; /**< First part of the tweakey */ - uint32_t TK2[4]; /**< Second part of the tweakey */ - uint32_t TK3[4]; /**< Third part of the tweakey */ - uint32_t S[4]; /**< Current block state */ - -} forkskinny_128_384_state_t; - -/** - * \brief Applies one round of ForkSkinny-128-384. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_384_round - (forkskinny_128_384_state_t *state, unsigned round) +void forkskinny_128_384_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -380,145 +222,56 @@ static void forkskinny_128_384_round s2 = state->S[2]; s3 = state->S[3]; - /* Apply the S-box to all cells in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - (rc & 0x0F) ^ 0x00020000; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny128_sbox(s0); + skinny128_sbox(s1); + skinny128_sbox(s2); + skinny128_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + (rc & 0x0F) ^ 0x00020000; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); + s2 ^= 0x02; + + /* Shift the cells in the rows right, which moves the cell + * values up closer to the MSB. That is, we do a left rotate + * on the word to rotate the cells in the word right */ + s1 = leftRotate8(s1); + s2 = leftRotate16(s2); + s3 = leftRotate24(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + + /* Permute TK1, TK2, and TK3 for the next round */ + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_permute_tk(state->TK3); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + } /* Save the local variables back to the state */ state->S[0] = s0; state->S[1] = s1; state->S[2] = s2; state->S[3] = s3; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny128_permute_tk(state->TK1); - skinny128_permute_tk(state->TK2); - skinny128_permute_tk(state->TK3); - skinny128_LFSR2(state->TK2[0]); - skinny128_LFSR2(state->TK2[1]); - skinny128_LFSR3(state->TK3[0]); - skinny128_LFSR3(state->TK3[1]); } -void forkskinny_128_384_encrypt - (const unsigned char key[48], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_384_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.TK3[0] = le_load_word32(key + 32); - state.TK3[1] = le_load_word32(key + 36); - state.TK3[2] = le_load_word32(key + 40); - state.TK3[3] = le_load_word32(key + 44); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_128_384_ROUNDS_BEFORE; ++round) { - forkskinny_128_384_round(&state, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint32_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&state, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x08040201U; /* Branching constant */ - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - for (round = (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); ++round) { - forkskinny_128_384_round(&state, round); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&state, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - } -} - -/** - * \brief Applies one round of ForkSkinny-128-384 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_384_inv_round - (forkskinny_128_384_state_t *state, unsigned round) +void forkskinny_128_384_inv_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -529,43 +282,46 @@ static void forkskinny_128_384_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Permute TK1 and TK2 for the next round */ - skinny128_inv_LFSR2(state->TK2[0]); - skinny128_inv_LFSR2(state->TK2[1]); - skinny128_inv_LFSR3(state->TK3[0]); - skinny128_inv_LFSR3(state->TK3[1]); - skinny128_inv_permute_tk(state->TK1); - skinny128_inv_permute_tk(state->TK2); - skinny128_inv_permute_tk(state->TK3); - - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left, which moves the cell - * values down closer to the LSB. That is, we do a right - * rotate on the word to rotate the cells in the word left */ - s1 = rightRotate8(s1); - s2 = rightRotate16(s2); - s3 = rightRotate24(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - (rc & 0x0F) ^ 0x00020000; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1 and TK2 for the next round */ + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + skinny128_inv_permute_tk(state->TK3); + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left, which moves the cell + * values down closer to the LSB. That is, we do a right + * rotate on the word to rotate the cells in the word left */ + s1 = rightRotate8(s1); + s2 = rightRotate16(s2); + s3 = rightRotate24(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + (rc & 0x0F) ^ 0x00020000; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); + s2 ^= 0x02; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny128_inv_sbox(s0); + skinny128_inv_sbox(s1); + skinny128_inv_sbox(s2); + skinny128_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -574,128 +330,78 @@ static void forkskinny_128_384_inv_round state->S[3] = s3; } -void forkskinny_128_384_decrypt - (const unsigned char key[48], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_128_384_forward_tk + (forkskinny_128_384_state_t *state, unsigned rounds) { - forkskinny_128_384_state_t state; - forkskinny_128_384_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.TK3[0] = le_load_word32(key + 32); - state.TK3[1] = le_load_word32(key + 36); - state.TK3[2] = le_load_word32(key + 40); - state.TK3[3] = le_load_word32(key + 44); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); ++round) { - skinny128_permute_tk(state.TK1); - skinny128_permute_tk(state.TK2); - skinny128_permute_tk(state.TK3); - skinny128_LFSR2(state.TK2[0]); - skinny128_LFSR2(state.TK2[1]); - skinny128_LFSR3(state.TK3[0]); - skinny128_LFSR3(state.TK3[1]); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR2(state->TK2[2]); + skinny128_LFSR2(state->TK2[3]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + skinny128_LFSR3(state->TK3[2]); + skinny128_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); - round > (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); --round) { - forkskinny_128_384_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_permute_tk(state->TK3); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + --rounds; } +} - /* Remove the branching constant */ - state.S[0] ^= 0x08040201U; - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_128_384_ROUNDS_AFTER; ++round) { - skinny128_inv_LFSR2(state.TK2[0]); - skinny128_inv_LFSR2(state.TK2[1]); - skinny128_inv_LFSR3(state.TK3[0]); - skinny128_inv_LFSR3(state.TK3[1]); - skinny128_inv_permute_tk(state.TK1); - skinny128_inv_permute_tk(state.TK2); - skinny128_inv_permute_tk(state.TK3); +void forkskinny_128_384_reverse_tk + (forkskinny_128_384_state_t *state, unsigned rounds) +{ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR2(state->TK2[2]); + skinny128_inv_LFSR2(state->TK2[3]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_LFSR3(state->TK3[2]); + skinny128_inv_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_128_384_inv_round(&state, round - 1); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&fstate, round); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + skinny128_inv_permute_tk(state->TK3); + --rounds; } - le_store_word32(output_right, fstate.S[0]); - le_store_word32(output_right + 4, fstate.S[1]); - le_store_word32(output_right + 8, fstate.S[2]); - le_store_word32(output_right + 12, fstate.S[3]); } -/** - * \brief Number of rounds of ForkSkinny-64-192 before forking. - */ -#define FORKSKINNY_64_192_ROUNDS_BEFORE 17 - -/** - * \brief Number of rounds of ForkSkinny-64-192 after forking. - */ -#define FORKSKINNY_64_192_ROUNDS_AFTER 23 - -/** - * \brief State information for ForkSkinny-64-192. - */ -typedef struct -{ - uint16_t TK1[4]; /**< First part of the tweakey */ - uint16_t TK2[4]; /**< Second part of the tweakey */ - uint16_t TK3[4]; /**< Third part of the tweakey */ - uint16_t S[4]; /**< Current block state */ - -} forkskinny_64_192_state_t; - -/** - * \brief Applies one round of ForkSkinny-64-192. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - * - * Note: The cells of each row are order in big-endian nibble order - * so it is easiest to manage the rows in bit-endian byte order. - */ -static void forkskinny_64_192_round - (forkskinny_64_192_state_t *state, unsigned round) +void forkskinny_64_192_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last) { uint16_t s0, s1, s2, s3, temp; uint8_t rc; @@ -706,144 +412,55 @@ static void forkskinny_64_192_round s2 = state->S[2]; s3 = state->S[3]; - /* Apply the S-box to all cells in the state */ - skinny64_sbox(s0); - skinny64_sbox(s1); - skinny64_sbox(s2); - skinny64_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - ((rc & 0x0F) << 12) ^ 0x0020; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ - ((rc & 0x70) << 8); - s2 ^= 0x2000; - - /* Shift the cells in the rows right */ - s1 = rightRotate4_16(s1); - s2 = rightRotate8_16(s2); - s3 = rightRotate12_16(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny64_sbox(s0); + skinny64_sbox(s1); + skinny64_sbox(s2); + skinny64_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + ((rc & 0x0F) << 12) ^ 0x0020; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ + ((rc & 0x70) << 8); + s2 ^= 0x2000; + + /* Shift the cells in the rows right */ + s1 = rightRotate4_16(s1); + s2 = rightRotate8_16(s2); + s3 = rightRotate12_16(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + + /* Permute TK1, TK2, and TK3 for the next round */ + skinny64_permute_tk(state->TK1); + skinny64_permute_tk(state->TK2); + skinny64_permute_tk(state->TK3); + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + } /* Save the local variables back to the state */ state->S[0] = s0; state->S[1] = s1; state->S[2] = s2; state->S[3] = s3; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny64_permute_tk(state->TK1); - skinny64_permute_tk(state->TK2); - skinny64_permute_tk(state->TK3); - skinny64_LFSR2(state->TK2[0]); - skinny64_LFSR2(state->TK2[1]); - skinny64_LFSR3(state->TK3[0]); - skinny64_LFSR3(state->TK3[1]); } -void forkskinny_64_192_encrypt - (const unsigned char key[24], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_64_192_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = be_load_word16(key); - state.TK1[1] = be_load_word16(key + 2); - state.TK1[2] = be_load_word16(key + 4); - state.TK1[3] = be_load_word16(key + 6); - state.TK2[0] = be_load_word16(key + 8); - state.TK2[1] = be_load_word16(key + 10); - state.TK2[2] = be_load_word16(key + 12); - state.TK2[3] = be_load_word16(key + 14); - state.TK3[0] = be_load_word16(key + 16); - state.TK3[1] = be_load_word16(key + 18); - state.TK3[2] = be_load_word16(key + 20); - state.TK3[3] = be_load_word16(key + 22); - state.S[0] = be_load_word16(input); - state.S[1] = be_load_word16(input + 2); - state.S[2] = be_load_word16(input + 4); - state.S[3] = be_load_word16(input + 6); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_64_192_ROUNDS_BEFORE; ++round) { - forkskinny_64_192_round(&state, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint16_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_right, state.S[0]); - be_store_word16(output_right + 2, state.S[1]); - be_store_word16(output_right + 4, state.S[2]); - be_store_word16(output_right + 6, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x1249U; /* Branching constant */ - state.S[1] ^= 0x36daU; - state.S[2] ^= 0x5b7fU; - state.S[3] ^= 0xec81U; - for (round = (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_left, state.S[0]); - be_store_word16(output_left + 2, state.S[1]); - be_store_word16(output_left + 4, state.S[2]); - be_store_word16(output_left + 6, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_right, state.S[0]); - be_store_word16(output_right + 2, state.S[1]); - be_store_word16(output_right + 4, state.S[2]); - be_store_word16(output_right + 6, state.S[3]); - } -} - -/** - * \brief Applies one round of ForkSkinny-64-192 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_64_192_inv_round - (forkskinny_64_192_state_t *state, unsigned round) +void forkskinny_64_192_inv_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last) { uint16_t s0, s1, s2, s3, temp; uint8_t rc; @@ -854,42 +471,45 @@ static void forkskinny_64_192_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Permute TK1, TK2, and TK3 for the next round */ - skinny64_inv_LFSR2(state->TK2[0]); - skinny64_inv_LFSR2(state->TK2[1]); - skinny64_inv_LFSR3(state->TK3[0]); - skinny64_inv_LFSR3(state->TK3[1]); - skinny64_inv_permute_tk(state->TK1); - skinny64_inv_permute_tk(state->TK2); - skinny64_inv_permute_tk(state->TK3); - - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left */ - s1 = leftRotate4_16(s1); - s2 = leftRotate8_16(s2); - s3 = leftRotate12_16(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - ((rc & 0x0F) << 12) ^ 0x0020; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ - ((rc & 0x70) << 8); - s2 ^= 0x2000; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny64_inv_sbox(s0); - skinny64_inv_sbox(s1); - skinny64_inv_sbox(s2); - skinny64_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1, TK2, and TK3 for the next round */ + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_permute_tk(state->TK1); + skinny64_inv_permute_tk(state->TK2); + skinny64_inv_permute_tk(state->TK3); + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left */ + s1 = leftRotate4_16(s1); + s2 = leftRotate8_16(s2); + s3 = leftRotate12_16(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + ((rc & 0x0F) << 12) ^ 0x0020; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ + ((rc & 0x70) << 8); + s2 ^= 0x2000; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny64_inv_sbox(s0); + skinny64_inv_sbox(s1); + skinny64_inv_sbox(s2); + skinny64_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -898,91 +518,74 @@ static void forkskinny_64_192_inv_round state->S[3] = s3; } -void forkskinny_64_192_decrypt - (const unsigned char key[24], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_64_192_forward_tk + (forkskinny_64_192_state_t *state, unsigned rounds) { - forkskinny_64_192_state_t state; - forkskinny_64_192_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = be_load_word16(key); - state.TK1[1] = be_load_word16(key + 2); - state.TK1[2] = be_load_word16(key + 4); - state.TK1[3] = be_load_word16(key + 6); - state.TK2[0] = be_load_word16(key + 8); - state.TK2[1] = be_load_word16(key + 10); - state.TK2[2] = be_load_word16(key + 12); - state.TK2[3] = be_load_word16(key + 14); - state.TK3[0] = be_load_word16(key + 16); - state.TK3[1] = be_load_word16(key + 18); - state.TK3[2] = be_load_word16(key + 20); - state.TK3[3] = be_load_word16(key + 22); - state.S[0] = be_load_word16(input); - state.S[1] = be_load_word16(input + 2); - state.S[2] = be_load_word16(input + 4); - state.S[3] = be_load_word16(input + 6); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); ++round) { - skinny64_permute_tk(state.TK1); - skinny64_permute_tk(state.TK2); - skinny64_permute_tk(state.TK3); - skinny64_LFSR2(state.TK2[0]); - skinny64_LFSR2(state.TK2[1]); - skinny64_LFSR3(state.TK3[0]); - skinny64_LFSR3(state.TK3[1]); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny64_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR2(state->TK2[2]); + skinny64_LFSR2(state->TK2[3]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + skinny64_LFSR3(state->TK3[2]); + skinny64_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); - round > (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); --round) { - forkskinny_64_192_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny64_permute_tk(state->TK1); + skinny64_permute_tk(state->TK2); + skinny64_permute_tk(state->TK3); + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + --rounds; } +} - /* Remove the branching constant */ - state.S[0] ^= 0x1249U; - state.S[1] ^= 0x36daU; - state.S[2] ^= 0x5b7fU; - state.S[3] ^= 0xec81U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_64_192_ROUNDS_AFTER; ++round) { - skinny64_inv_LFSR2(state.TK2[0]); - skinny64_inv_LFSR2(state.TK2[1]); - skinny64_inv_LFSR3(state.TK3[0]); - skinny64_inv_LFSR3(state.TK3[1]); - skinny64_inv_permute_tk(state.TK1); - skinny64_inv_permute_tk(state.TK2); - skinny64_inv_permute_tk(state.TK3); +void forkskinny_64_192_reverse_tk + (forkskinny_64_192_state_t *state, unsigned rounds) +{ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny64_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR2(state->TK2[2]); + skinny64_inv_LFSR2(state->TK2[3]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_LFSR3(state->TK3[2]); + skinny64_inv_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_64_192_inv_round(&state, round - 1); - } - be_store_word16(output_left, state.S[0]); - be_store_word16(output_left + 2, state.S[1]); - be_store_word16(output_left + 4, state.S[2]); - be_store_word16(output_left + 6, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&fstate, round); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_permute_tk(state->TK1); + skinny64_inv_permute_tk(state->TK2); + skinny64_inv_permute_tk(state->TK3); + --rounds; } - be_store_word16(output_right, fstate.S[0]); - be_store_word16(output_right + 2, fstate.S[1]); - be_store_word16(output_right + 4, fstate.S[2]); - be_store_word16(output_right + 6, fstate.S[3]); } + +#endif /* !__AVR__ */ diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/rhys/internal-forkskinny.h b/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/rhys/internal-forkskinny.h index 0c1a707..e3014d3 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/rhys/internal-forkskinny.h +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/rhys/internal-forkskinny.h @@ -23,6 +23,8 @@ #ifndef LW_INTERNAL_FORKSKINNY_H #define LW_INTERNAL_FORKSKINNY_H +#include "internal-util.h" + /** * \file internal-forkskinny.h * \brief ForkSkinny block cipher family. @@ -39,6 +41,158 @@ extern "C" { #endif /** + * \brief State information for ForkSkinny-128-256. + */ +typedef struct +{ + uint32_t TK1[4]; /**< First part of the tweakey */ + uint32_t TK2[4]; /**< Second part of the tweakey */ + uint32_t S[4]; /**< Current block state */ + +} forkskinny_128_256_state_t; + +/** + * \brief State information for ForkSkinny-128-384. + */ +typedef struct +{ + uint32_t TK1[4]; /**< First part of the tweakey */ + uint32_t TK2[4]; /**< Second part of the tweakey */ + uint32_t TK3[4]; /**< Third part of the tweakey */ + uint32_t S[4]; /**< Current block state */ + +} forkskinny_128_384_state_t; + +/** + * \brief State information for ForkSkinny-64-192. + */ +typedef struct +{ + uint16_t TK1[4]; /**< First part of the tweakey */ + uint16_t TK2[4]; /**< Second part of the tweakey */ + uint16_t TK3[4]; /**< Third part of the tweakey */ + uint16_t S[4]; /**< Current block state */ + +} forkskinny_64_192_state_t; + +/** + * \brief Applies several rounds of ForkSkinny-128-256. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + */ +void forkskinny_128_256_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-128-256 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_128_256_inv_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-128-256. + * + * \param state Points to the ForkSkinny-128-256 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_128_256_forward_tk + (forkskinny_128_256_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-128-256. + * + * \param state Points to the ForkSkinny-128-256 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_128_256_reverse_tk + (forkskinny_128_256_state_t *state, unsigned rounds); + +/** + * \brief Applies several rounds of ForkSkinny-128-384. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + */ +void forkskinny_128_384_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-128-384 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_128_384_inv_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-128-384. + * + * \param state Points to the ForkSkinny-128-384 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_128_384_forward_tk + (forkskinny_128_384_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-128-384. + * + * \param state Points to the ForkSkinny-128-384 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_128_384_reverse_tk + (forkskinny_128_384_state_t *state, unsigned rounds); + +/** + * \brief Applies several rounds of ForkSkinny-64-192. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + * + * Note: The cells of each row are ordered in big-endian nibble order + * so it is simplest to manage the rows in big-endian byte order. + */ +void forkskinny_64_192_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-64-192 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_64_192_inv_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-64-192. + * + * \param state Points to the ForkSkinny-64-192 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_64_192_forward_tk + (forkskinny_64_192_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-64-192. + * + * \param state Points to the ForkSkinny-64-192 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_64_192_reverse_tk + (forkskinny_64_192_state_t *state, unsigned rounds); + +/** * \brief Encrypts a block of plaintext with ForkSkinny-128-256. * * \param key 256-bit tweakey for ForkSkinny-128-256. diff --git a/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/rhys/internal-skinnyutil.h b/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/rhys/internal-skinnyutil.h index 83136cb..8a5296d 100644 --- a/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/rhys/internal-skinnyutil.h +++ b/forkae/Implementations/crypto_aead/paefforkskinnyb64t192n48v1/rhys/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: diff --git a/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/rhys/forkae.c b/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/rhys/forkae.c index 4a9671a..49e7610 100644 --- a/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/rhys/forkae.c +++ b/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/rhys/forkae.c @@ -22,7 +22,6 @@ #include "forkae.h" #include "internal-forkskinny.h" -#include "internal-util.h" #include aead_cipher_t const forkae_paef_64_192_cipher = { @@ -138,3 +137,476 @@ aead_cipher_t const forkae_saef_128_256_cipher = { #define FORKAE_TWEAKEY_REDUCED_SIZE 32 #define FORKAE_BLOCK_FUNC forkskinny_128_256 #include "internal-forkae-saef.h" + +/* Helper functions to implement the forking encrypt/decrypt block operations + * on top of the basic "perform N rounds" functions in internal-forkskinny.c */ + +/** + * \brief Number of rounds of ForkSkinny-128-256 before forking. + */ +#define FORKSKINNY_128_256_ROUNDS_BEFORE 21 + +/** + * \brief Number of rounds of ForkSkinny-128-256 after forking. + */ +#define FORKSKINNY_128_256_ROUNDS_AFTER 27 + +void forkskinny_128_256_encrypt + (const unsigned char key[32], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_256_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Run all of the rounds before the forking point */ + forkskinny_128_256_rounds(&state, 0, FORKSKINNY_128_256_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint32_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x08040201U; /* Branching constant */ + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + } +} + +void forkskinny_128_256_decrypt + (const unsigned char key[32], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_256_state_t state; + forkskinny_128_256_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_128_256_forward_tk + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_128_256_inv_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x08040201U; + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_128_256_reverse_tk(&state, FORKSKINNY_128_256_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_128_256_inv_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, 0); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_128_256_rounds + (&fstate, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, fstate.S[0]); + le_store_word32(output_right + 4, fstate.S[1]); + le_store_word32(output_right + 8, fstate.S[2]); + le_store_word32(output_right + 12, fstate.S[3]); +} + +/** + * \brief Number of rounds of ForkSkinny-128-384 before forking. + */ +#define FORKSKINNY_128_384_ROUNDS_BEFORE 25 + +/** + * \brief Number of rounds of ForkSkinny-128-384 after forking. + */ +#define FORKSKINNY_128_384_ROUNDS_AFTER 31 + +void forkskinny_128_384_encrypt + (const unsigned char key[48], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_384_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.TK3[0] = le_load_word32(key + 32); + state.TK3[1] = le_load_word32(key + 36); + state.TK3[2] = le_load_word32(key + 40); + state.TK3[3] = le_load_word32(key + 44); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Run all of the rounds before the forking point */ + forkskinny_128_384_rounds(&state, 0, FORKSKINNY_128_384_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint32_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x08040201U; /* Branching constant */ + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + } +} + +void forkskinny_128_384_decrypt + (const unsigned char key[48], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_384_state_t state; + forkskinny_128_384_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.TK3[0] = le_load_word32(key + 32); + state.TK3[1] = le_load_word32(key + 36); + state.TK3[2] = le_load_word32(key + 40); + state.TK3[3] = le_load_word32(key + 44); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_128_384_forward_tk + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_128_384_inv_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x08040201U; + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_128_384_reverse_tk(&state, FORKSKINNY_128_384_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_128_384_inv_rounds(&state, FORKSKINNY_128_384_ROUNDS_BEFORE, 0); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_128_384_rounds + (&fstate, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, fstate.S[0]); + le_store_word32(output_right + 4, fstate.S[1]); + le_store_word32(output_right + 8, fstate.S[2]); + le_store_word32(output_right + 12, fstate.S[3]); +} + +/** + * \brief Number of rounds of ForkSkinny-64-192 before forking. + */ +#define FORKSKINNY_64_192_ROUNDS_BEFORE 17 + +/** + * \brief Number of rounds of ForkSkinny-64-192 after forking. + */ +#define FORKSKINNY_64_192_ROUNDS_AFTER 23 + +void forkskinny_64_192_encrypt + (const unsigned char key[24], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_64_192_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = be_load_word16(key); + state.TK1[1] = be_load_word16(key + 2); + state.TK1[2] = be_load_word16(key + 4); + state.TK1[3] = be_load_word16(key + 6); + state.TK2[0] = be_load_word16(key + 8); + state.TK2[1] = be_load_word16(key + 10); + state.TK2[2] = be_load_word16(key + 12); + state.TK2[3] = be_load_word16(key + 14); + state.TK3[0] = be_load_word16(key + 16); + state.TK3[1] = be_load_word16(key + 18); + state.TK3[2] = be_load_word16(key + 20); + state.TK3[3] = be_load_word16(key + 22); + state.S[0] = be_load_word16(input); + state.S[1] = be_load_word16(input + 2); + state.S[2] = be_load_word16(input + 4); + state.S[3] = be_load_word16(input + 6); + + /* Run all of the rounds before the forking point */ + forkskinny_64_192_rounds(&state, 0, FORKSKINNY_64_192_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint16_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, state.S[0]); + be_store_word16(output_right + 2, state.S[1]); + be_store_word16(output_right + 4, state.S[2]); + be_store_word16(output_right + 6, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x1249U; /* Branching constant */ + state.S[1] ^= 0x36daU; + state.S[2] ^= 0x5b7fU; + state.S[3] ^= 0xec81U; + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2); + be_store_word16(output_left, state.S[0]); + be_store_word16(output_left + 2, state.S[1]); + be_store_word16(output_left + 4, state.S[2]); + be_store_word16(output_left + 6, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, state.S[0]); + be_store_word16(output_right + 2, state.S[1]); + be_store_word16(output_right + 4, state.S[2]); + be_store_word16(output_right + 6, state.S[3]); + } +} + +void forkskinny_64_192_decrypt + (const unsigned char key[24], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_64_192_state_t state; + forkskinny_64_192_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = be_load_word16(key); + state.TK1[1] = be_load_word16(key + 2); + state.TK1[2] = be_load_word16(key + 4); + state.TK1[3] = be_load_word16(key + 6); + state.TK2[0] = be_load_word16(key + 8); + state.TK2[1] = be_load_word16(key + 10); + state.TK2[2] = be_load_word16(key + 12); + state.TK2[3] = be_load_word16(key + 14); + state.TK3[0] = be_load_word16(key + 16); + state.TK3[1] = be_load_word16(key + 18); + state.TK3[2] = be_load_word16(key + 20); + state.TK3[3] = be_load_word16(key + 22); + state.S[0] = be_load_word16(input); + state.S[1] = be_load_word16(input + 2); + state.S[2] = be_load_word16(input + 4); + state.S[3] = be_load_word16(input + 6); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_64_192_forward_tk + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_64_192_inv_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x1249U; + state.S[1] ^= 0x36daU; + state.S[2] ^= 0x5b7fU; + state.S[3] ^= 0xec81U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_64_192_reverse_tk(&state, FORKSKINNY_64_192_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_64_192_inv_rounds(&state, FORKSKINNY_64_192_ROUNDS_BEFORE, 0); + be_store_word16(output_left, state.S[0]); + be_store_word16(output_left + 2, state.S[1]); + be_store_word16(output_left + 4, state.S[2]); + be_store_word16(output_left + 6, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_64_192_rounds + (&fstate, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, fstate.S[0]); + be_store_word16(output_right + 2, fstate.S[1]); + be_store_word16(output_right + 4, fstate.S[2]); + be_store_word16(output_right + 6, fstate.S[3]); +} diff --git a/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/rhys/internal-forkskinny-avr.S b/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/rhys/internal-forkskinny-avr.S new file mode 100644 index 0000000..c7e0b37 --- /dev/null +++ b/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/rhys/internal-forkskinny-avr.S @@ -0,0 +1,8880 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_0, @object + .size table_0, 256 +table_0: + .byte 101 + .byte 76 + .byte 106 + .byte 66 + .byte 75 + .byte 99 + .byte 67 + .byte 107 + .byte 85 + .byte 117 + .byte 90 + .byte 122 + .byte 83 + .byte 115 + .byte 91 + .byte 123 + .byte 53 + .byte 140 + .byte 58 + .byte 129 + .byte 137 + .byte 51 + .byte 128 + .byte 59 + .byte 149 + .byte 37 + .byte 152 + .byte 42 + .byte 144 + .byte 35 + .byte 153 + .byte 43 + .byte 229 + .byte 204 + .byte 232 + .byte 193 + .byte 201 + .byte 224 + .byte 192 + .byte 233 + .byte 213 + .byte 245 + .byte 216 + .byte 248 + .byte 208 + .byte 240 + .byte 217 + .byte 249 + .byte 165 + .byte 28 + .byte 168 + .byte 18 + .byte 27 + .byte 160 + .byte 19 + .byte 169 + .byte 5 + .byte 181 + .byte 10 + .byte 184 + .byte 3 + .byte 176 + .byte 11 + .byte 185 + .byte 50 + .byte 136 + .byte 60 + .byte 133 + .byte 141 + .byte 52 + .byte 132 + .byte 61 + .byte 145 + .byte 34 + .byte 156 + .byte 44 + .byte 148 + .byte 36 + .byte 157 + .byte 45 + .byte 98 + .byte 74 + .byte 108 + .byte 69 + .byte 77 + .byte 100 + .byte 68 + .byte 109 + .byte 82 + .byte 114 + .byte 92 + .byte 124 + .byte 84 + .byte 116 + .byte 93 + .byte 125 + .byte 161 + .byte 26 + .byte 172 + .byte 21 + .byte 29 + .byte 164 + .byte 20 + .byte 173 + .byte 2 + .byte 177 + .byte 12 + .byte 188 + .byte 4 + .byte 180 + .byte 13 + .byte 189 + .byte 225 + .byte 200 + .byte 236 + .byte 197 + .byte 205 + .byte 228 + .byte 196 + .byte 237 + .byte 209 + .byte 241 + .byte 220 + .byte 252 + .byte 212 + .byte 244 + .byte 221 + .byte 253 + .byte 54 + .byte 142 + .byte 56 + .byte 130 + .byte 139 + .byte 48 + .byte 131 + .byte 57 + .byte 150 + .byte 38 + .byte 154 + .byte 40 + .byte 147 + .byte 32 + .byte 155 + .byte 41 + .byte 102 + .byte 78 + .byte 104 + .byte 65 + .byte 73 + .byte 96 + .byte 64 + .byte 105 + .byte 86 + .byte 118 + .byte 88 + .byte 120 + .byte 80 + .byte 112 + .byte 89 + .byte 121 + .byte 166 + .byte 30 + .byte 170 + .byte 17 + .byte 25 + .byte 163 + .byte 16 + .byte 171 + .byte 6 + .byte 182 + .byte 8 + .byte 186 + .byte 0 + .byte 179 + .byte 9 + .byte 187 + .byte 230 + .byte 206 + .byte 234 + .byte 194 + .byte 203 + .byte 227 + .byte 195 + .byte 235 + .byte 214 + .byte 246 + .byte 218 + .byte 250 + .byte 211 + .byte 243 + .byte 219 + .byte 251 + .byte 49 + .byte 138 + .byte 62 + .byte 134 + .byte 143 + .byte 55 + .byte 135 + .byte 63 + .byte 146 + .byte 33 + .byte 158 + .byte 46 + .byte 151 + .byte 39 + .byte 159 + .byte 47 + .byte 97 + .byte 72 + .byte 110 + .byte 70 + .byte 79 + .byte 103 + .byte 71 + .byte 111 + .byte 81 + .byte 113 + .byte 94 + .byte 126 + .byte 87 + .byte 119 + .byte 95 + .byte 127 + .byte 162 + .byte 24 + .byte 174 + .byte 22 + .byte 31 + .byte 167 + .byte 23 + .byte 175 + .byte 1 + .byte 178 + .byte 14 + .byte 190 + .byte 7 + .byte 183 + .byte 15 + .byte 191 + .byte 226 + .byte 202 + .byte 238 + .byte 198 + .byte 207 + .byte 231 + .byte 199 + .byte 239 + .byte 210 + .byte 242 + .byte 222 + .byte 254 + .byte 215 + .byte 247 + .byte 223 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_1, @object + .size table_1, 256 +table_1: + .byte 172 + .byte 232 + .byte 104 + .byte 60 + .byte 108 + .byte 56 + .byte 168 + .byte 236 + .byte 170 + .byte 174 + .byte 58 + .byte 62 + .byte 106 + .byte 110 + .byte 234 + .byte 238 + .byte 166 + .byte 163 + .byte 51 + .byte 54 + .byte 102 + .byte 99 + .byte 227 + .byte 230 + .byte 225 + .byte 164 + .byte 97 + .byte 52 + .byte 49 + .byte 100 + .byte 161 + .byte 228 + .byte 141 + .byte 201 + .byte 73 + .byte 29 + .byte 77 + .byte 25 + .byte 137 + .byte 205 + .byte 139 + .byte 143 + .byte 27 + .byte 31 + .byte 75 + .byte 79 + .byte 203 + .byte 207 + .byte 133 + .byte 192 + .byte 64 + .byte 21 + .byte 69 + .byte 16 + .byte 128 + .byte 197 + .byte 130 + .byte 135 + .byte 18 + .byte 23 + .byte 66 + .byte 71 + .byte 194 + .byte 199 + .byte 150 + .byte 147 + .byte 3 + .byte 6 + .byte 86 + .byte 83 + .byte 211 + .byte 214 + .byte 209 + .byte 148 + .byte 81 + .byte 4 + .byte 1 + .byte 84 + .byte 145 + .byte 212 + .byte 156 + .byte 216 + .byte 88 + .byte 12 + .byte 92 + .byte 8 + .byte 152 + .byte 220 + .byte 154 + .byte 158 + .byte 10 + .byte 14 + .byte 90 + .byte 94 + .byte 218 + .byte 222 + .byte 149 + .byte 208 + .byte 80 + .byte 5 + .byte 85 + .byte 0 + .byte 144 + .byte 213 + .byte 146 + .byte 151 + .byte 2 + .byte 7 + .byte 82 + .byte 87 + .byte 210 + .byte 215 + .byte 157 + .byte 217 + .byte 89 + .byte 13 + .byte 93 + .byte 9 + .byte 153 + .byte 221 + .byte 155 + .byte 159 + .byte 11 + .byte 15 + .byte 91 + .byte 95 + .byte 219 + .byte 223 + .byte 22 + .byte 19 + .byte 131 + .byte 134 + .byte 70 + .byte 67 + .byte 195 + .byte 198 + .byte 65 + .byte 20 + .byte 193 + .byte 132 + .byte 17 + .byte 68 + .byte 129 + .byte 196 + .byte 28 + .byte 72 + .byte 200 + .byte 140 + .byte 76 + .byte 24 + .byte 136 + .byte 204 + .byte 26 + .byte 30 + .byte 138 + .byte 142 + .byte 74 + .byte 78 + .byte 202 + .byte 206 + .byte 53 + .byte 96 + .byte 224 + .byte 165 + .byte 101 + .byte 48 + .byte 160 + .byte 229 + .byte 50 + .byte 55 + .byte 162 + .byte 167 + .byte 98 + .byte 103 + .byte 226 + .byte 231 + .byte 61 + .byte 105 + .byte 233 + .byte 173 + .byte 109 + .byte 57 + .byte 169 + .byte 237 + .byte 59 + .byte 63 + .byte 171 + .byte 175 + .byte 107 + .byte 111 + .byte 235 + .byte 239 + .byte 38 + .byte 35 + .byte 179 + .byte 182 + .byte 118 + .byte 115 + .byte 243 + .byte 246 + .byte 113 + .byte 36 + .byte 241 + .byte 180 + .byte 33 + .byte 116 + .byte 177 + .byte 244 + .byte 44 + .byte 120 + .byte 248 + .byte 188 + .byte 124 + .byte 40 + .byte 184 + .byte 252 + .byte 42 + .byte 46 + .byte 186 + .byte 190 + .byte 122 + .byte 126 + .byte 250 + .byte 254 + .byte 37 + .byte 112 + .byte 240 + .byte 181 + .byte 117 + .byte 32 + .byte 176 + .byte 245 + .byte 34 + .byte 39 + .byte 178 + .byte 183 + .byte 114 + .byte 119 + .byte 242 + .byte 247 + .byte 45 + .byte 121 + .byte 249 + .byte 189 + .byte 125 + .byte 41 + .byte 185 + .byte 253 + .byte 43 + .byte 47 + .byte 187 + .byte 191 + .byte 123 + .byte 127 + .byte 251 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_2, @object + .size table_2, 256 +table_2: + .byte 0 + .byte 2 + .byte 4 + .byte 6 + .byte 8 + .byte 10 + .byte 12 + .byte 14 + .byte 16 + .byte 18 + .byte 20 + .byte 22 + .byte 24 + .byte 26 + .byte 28 + .byte 30 + .byte 32 + .byte 34 + .byte 36 + .byte 38 + .byte 40 + .byte 42 + .byte 44 + .byte 46 + .byte 48 + .byte 50 + .byte 52 + .byte 54 + .byte 56 + .byte 58 + .byte 60 + .byte 62 + .byte 65 + .byte 67 + .byte 69 + .byte 71 + .byte 73 + .byte 75 + .byte 77 + .byte 79 + .byte 81 + .byte 83 + .byte 85 + .byte 87 + .byte 89 + .byte 91 + .byte 93 + .byte 95 + .byte 97 + .byte 99 + .byte 101 + .byte 103 + .byte 105 + .byte 107 + .byte 109 + .byte 111 + .byte 113 + .byte 115 + .byte 117 + .byte 119 + .byte 121 + .byte 123 + .byte 125 + .byte 127 + .byte 128 + .byte 130 + .byte 132 + .byte 134 + .byte 136 + .byte 138 + .byte 140 + .byte 142 + .byte 144 + .byte 146 + .byte 148 + .byte 150 + .byte 152 + .byte 154 + .byte 156 + .byte 158 + .byte 160 + .byte 162 + .byte 164 + .byte 166 + .byte 168 + .byte 170 + .byte 172 + .byte 174 + .byte 176 + .byte 178 + .byte 180 + .byte 182 + .byte 184 + .byte 186 + .byte 188 + .byte 190 + .byte 193 + .byte 195 + .byte 197 + .byte 199 + .byte 201 + .byte 203 + .byte 205 + .byte 207 + .byte 209 + .byte 211 + .byte 213 + .byte 215 + .byte 217 + .byte 219 + .byte 221 + .byte 223 + .byte 225 + .byte 227 + .byte 229 + .byte 231 + .byte 233 + .byte 235 + .byte 237 + .byte 239 + .byte 241 + .byte 243 + .byte 245 + .byte 247 + .byte 249 + .byte 251 + .byte 253 + .byte 255 + .byte 1 + .byte 3 + .byte 5 + .byte 7 + .byte 9 + .byte 11 + .byte 13 + .byte 15 + .byte 17 + .byte 19 + .byte 21 + .byte 23 + .byte 25 + .byte 27 + .byte 29 + .byte 31 + .byte 33 + .byte 35 + .byte 37 + .byte 39 + .byte 41 + .byte 43 + .byte 45 + .byte 47 + .byte 49 + .byte 51 + .byte 53 + .byte 55 + .byte 57 + .byte 59 + .byte 61 + .byte 63 + .byte 64 + .byte 66 + .byte 68 + .byte 70 + .byte 72 + .byte 74 + .byte 76 + .byte 78 + .byte 80 + .byte 82 + .byte 84 + .byte 86 + .byte 88 + .byte 90 + .byte 92 + .byte 94 + .byte 96 + .byte 98 + .byte 100 + .byte 102 + .byte 104 + .byte 106 + .byte 108 + .byte 110 + .byte 112 + .byte 114 + .byte 116 + .byte 118 + .byte 120 + .byte 122 + .byte 124 + .byte 126 + .byte 129 + .byte 131 + .byte 133 + .byte 135 + .byte 137 + .byte 139 + .byte 141 + .byte 143 + .byte 145 + .byte 147 + .byte 149 + .byte 151 + .byte 153 + .byte 155 + .byte 157 + .byte 159 + .byte 161 + .byte 163 + .byte 165 + .byte 167 + .byte 169 + .byte 171 + .byte 173 + .byte 175 + .byte 177 + .byte 179 + .byte 181 + .byte 183 + .byte 185 + .byte 187 + .byte 189 + .byte 191 + .byte 192 + .byte 194 + .byte 196 + .byte 198 + .byte 200 + .byte 202 + .byte 204 + .byte 206 + .byte 208 + .byte 210 + .byte 212 + .byte 214 + .byte 216 + .byte 218 + .byte 220 + .byte 222 + .byte 224 + .byte 226 + .byte 228 + .byte 230 + .byte 232 + .byte 234 + .byte 236 + .byte 238 + .byte 240 + .byte 242 + .byte 244 + .byte 246 + .byte 248 + .byte 250 + .byte 252 + .byte 254 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_3, @object + .size table_3, 256 +table_3: + .byte 0 + .byte 128 + .byte 1 + .byte 129 + .byte 2 + .byte 130 + .byte 3 + .byte 131 + .byte 4 + .byte 132 + .byte 5 + .byte 133 + .byte 6 + .byte 134 + .byte 7 + .byte 135 + .byte 8 + .byte 136 + .byte 9 + .byte 137 + .byte 10 + .byte 138 + .byte 11 + .byte 139 + .byte 12 + .byte 140 + .byte 13 + .byte 141 + .byte 14 + .byte 142 + .byte 15 + .byte 143 + .byte 16 + .byte 144 + .byte 17 + .byte 145 + .byte 18 + .byte 146 + .byte 19 + .byte 147 + .byte 20 + .byte 148 + .byte 21 + .byte 149 + .byte 22 + .byte 150 + .byte 23 + .byte 151 + .byte 24 + .byte 152 + .byte 25 + .byte 153 + .byte 26 + .byte 154 + .byte 27 + .byte 155 + .byte 28 + .byte 156 + .byte 29 + .byte 157 + .byte 30 + .byte 158 + .byte 31 + .byte 159 + .byte 160 + .byte 32 + .byte 161 + .byte 33 + .byte 162 + .byte 34 + .byte 163 + .byte 35 + .byte 164 + .byte 36 + .byte 165 + .byte 37 + .byte 166 + .byte 38 + .byte 167 + .byte 39 + .byte 168 + .byte 40 + .byte 169 + .byte 41 + .byte 170 + .byte 42 + .byte 171 + .byte 43 + .byte 172 + .byte 44 + .byte 173 + .byte 45 + .byte 174 + .byte 46 + .byte 175 + .byte 47 + .byte 176 + .byte 48 + .byte 177 + .byte 49 + .byte 178 + .byte 50 + .byte 179 + .byte 51 + .byte 180 + .byte 52 + .byte 181 + .byte 53 + .byte 182 + .byte 54 + .byte 183 + .byte 55 + .byte 184 + .byte 56 + .byte 185 + .byte 57 + .byte 186 + .byte 58 + .byte 187 + .byte 59 + .byte 188 + .byte 60 + .byte 189 + .byte 61 + .byte 190 + .byte 62 + .byte 191 + .byte 63 + .byte 64 + .byte 192 + .byte 65 + .byte 193 + .byte 66 + .byte 194 + .byte 67 + .byte 195 + .byte 68 + .byte 196 + .byte 69 + .byte 197 + .byte 70 + .byte 198 + .byte 71 + .byte 199 + .byte 72 + .byte 200 + .byte 73 + .byte 201 + .byte 74 + .byte 202 + .byte 75 + .byte 203 + .byte 76 + .byte 204 + .byte 77 + .byte 205 + .byte 78 + .byte 206 + .byte 79 + .byte 207 + .byte 80 + .byte 208 + .byte 81 + .byte 209 + .byte 82 + .byte 210 + .byte 83 + .byte 211 + .byte 84 + .byte 212 + .byte 85 + .byte 213 + .byte 86 + .byte 214 + .byte 87 + .byte 215 + .byte 88 + .byte 216 + .byte 89 + .byte 217 + .byte 90 + .byte 218 + .byte 91 + .byte 219 + .byte 92 + .byte 220 + .byte 93 + .byte 221 + .byte 94 + .byte 222 + .byte 95 + .byte 223 + .byte 224 + .byte 96 + .byte 225 + .byte 97 + .byte 226 + .byte 98 + .byte 227 + .byte 99 + .byte 228 + .byte 100 + .byte 229 + .byte 101 + .byte 230 + .byte 102 + .byte 231 + .byte 103 + .byte 232 + .byte 104 + .byte 233 + .byte 105 + .byte 234 + .byte 106 + .byte 235 + .byte 107 + .byte 236 + .byte 108 + .byte 237 + .byte 109 + .byte 238 + .byte 110 + .byte 239 + .byte 111 + .byte 240 + .byte 112 + .byte 241 + .byte 113 + .byte 242 + .byte 114 + .byte 243 + .byte 115 + .byte 244 + .byte 116 + .byte 245 + .byte 117 + .byte 246 + .byte 118 + .byte 247 + .byte 119 + .byte 248 + .byte 120 + .byte 249 + .byte 121 + .byte 250 + .byte 122 + .byte 251 + .byte 123 + .byte 252 + .byte 124 + .byte 253 + .byte 125 + .byte 254 + .byte 126 + .byte 255 + .byte 127 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_4, @object + .size table_4, 174 +table_4: + .byte 1 + .byte 0 + .byte 3 + .byte 0 + .byte 7 + .byte 0 + .byte 15 + .byte 0 + .byte 15 + .byte 1 + .byte 15 + .byte 3 + .byte 14 + .byte 7 + .byte 13 + .byte 7 + .byte 11 + .byte 7 + .byte 7 + .byte 7 + .byte 15 + .byte 6 + .byte 15 + .byte 5 + .byte 14 + .byte 3 + .byte 12 + .byte 7 + .byte 9 + .byte 7 + .byte 3 + .byte 7 + .byte 7 + .byte 6 + .byte 15 + .byte 4 + .byte 14 + .byte 1 + .byte 13 + .byte 3 + .byte 10 + .byte 7 + .byte 5 + .byte 7 + .byte 11 + .byte 6 + .byte 7 + .byte 5 + .byte 14 + .byte 2 + .byte 12 + .byte 5 + .byte 8 + .byte 3 + .byte 0 + .byte 7 + .byte 1 + .byte 6 + .byte 3 + .byte 4 + .byte 6 + .byte 0 + .byte 13 + .byte 0 + .byte 11 + .byte 1 + .byte 7 + .byte 3 + .byte 14 + .byte 6 + .byte 13 + .byte 5 + .byte 10 + .byte 3 + .byte 4 + .byte 7 + .byte 9 + .byte 6 + .byte 3 + .byte 5 + .byte 6 + .byte 2 + .byte 12 + .byte 4 + .byte 8 + .byte 1 + .byte 1 + .byte 3 + .byte 2 + .byte 6 + .byte 5 + .byte 4 + .byte 10 + .byte 0 + .byte 5 + .byte 1 + .byte 11 + .byte 2 + .byte 6 + .byte 5 + .byte 12 + .byte 2 + .byte 8 + .byte 5 + .byte 0 + .byte 3 + .byte 0 + .byte 6 + .byte 1 + .byte 4 + .byte 2 + .byte 0 + .byte 5 + .byte 0 + .byte 11 + .byte 0 + .byte 7 + .byte 1 + .byte 15 + .byte 2 + .byte 14 + .byte 5 + .byte 12 + .byte 3 + .byte 8 + .byte 7 + .byte 1 + .byte 7 + .byte 3 + .byte 6 + .byte 7 + .byte 4 + .byte 14 + .byte 0 + .byte 13 + .byte 1 + .byte 11 + .byte 3 + .byte 6 + .byte 7 + .byte 13 + .byte 6 + .byte 11 + .byte 5 + .byte 6 + .byte 3 + .byte 12 + .byte 6 + .byte 9 + .byte 5 + .byte 2 + .byte 3 + .byte 4 + .byte 6 + .byte 9 + .byte 4 + .byte 2 + .byte 1 + .byte 5 + .byte 2 + .byte 10 + .byte 4 + .byte 4 + .byte 1 + .byte 9 + .byte 2 + .byte 2 + .byte 5 + .byte 4 + .byte 2 + .byte 8 + .byte 4 + .byte 0 + .byte 1 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_5, @object + .size table_5, 256 +table_5: + .byte 204 + .byte 198 + .byte 201 + .byte 192 + .byte 193 + .byte 202 + .byte 194 + .byte 203 + .byte 195 + .byte 200 + .byte 197 + .byte 205 + .byte 196 + .byte 206 + .byte 199 + .byte 207 + .byte 108 + .byte 102 + .byte 105 + .byte 96 + .byte 97 + .byte 106 + .byte 98 + .byte 107 + .byte 99 + .byte 104 + .byte 101 + .byte 109 + .byte 100 + .byte 110 + .byte 103 + .byte 111 + .byte 156 + .byte 150 + .byte 153 + .byte 144 + .byte 145 + .byte 154 + .byte 146 + .byte 155 + .byte 147 + .byte 152 + .byte 149 + .byte 157 + .byte 148 + .byte 158 + .byte 151 + .byte 159 + .byte 12 + .byte 6 + .byte 9 + .byte 0 + .byte 1 + .byte 10 + .byte 2 + .byte 11 + .byte 3 + .byte 8 + .byte 5 + .byte 13 + .byte 4 + .byte 14 + .byte 7 + .byte 15 + .byte 28 + .byte 22 + .byte 25 + .byte 16 + .byte 17 + .byte 26 + .byte 18 + .byte 27 + .byte 19 + .byte 24 + .byte 21 + .byte 29 + .byte 20 + .byte 30 + .byte 23 + .byte 31 + .byte 172 + .byte 166 + .byte 169 + .byte 160 + .byte 161 + .byte 170 + .byte 162 + .byte 171 + .byte 163 + .byte 168 + .byte 165 + .byte 173 + .byte 164 + .byte 174 + .byte 167 + .byte 175 + .byte 44 + .byte 38 + .byte 41 + .byte 32 + .byte 33 + .byte 42 + .byte 34 + .byte 43 + .byte 35 + .byte 40 + .byte 37 + .byte 45 + .byte 36 + .byte 46 + .byte 39 + .byte 47 + .byte 188 + .byte 182 + .byte 185 + .byte 176 + .byte 177 + .byte 186 + .byte 178 + .byte 187 + .byte 179 + .byte 184 + .byte 181 + .byte 189 + .byte 180 + .byte 190 + .byte 183 + .byte 191 + .byte 60 + .byte 54 + .byte 57 + .byte 48 + .byte 49 + .byte 58 + .byte 50 + .byte 59 + .byte 51 + .byte 56 + .byte 53 + .byte 61 + .byte 52 + .byte 62 + .byte 55 + .byte 63 + .byte 140 + .byte 134 + .byte 137 + .byte 128 + .byte 129 + .byte 138 + .byte 130 + .byte 139 + .byte 131 + .byte 136 + .byte 133 + .byte 141 + .byte 132 + .byte 142 + .byte 135 + .byte 143 + .byte 92 + .byte 86 + .byte 89 + .byte 80 + .byte 81 + .byte 90 + .byte 82 + .byte 91 + .byte 83 + .byte 88 + .byte 85 + .byte 93 + .byte 84 + .byte 94 + .byte 87 + .byte 95 + .byte 220 + .byte 214 + .byte 217 + .byte 208 + .byte 209 + .byte 218 + .byte 210 + .byte 219 + .byte 211 + .byte 216 + .byte 213 + .byte 221 + .byte 212 + .byte 222 + .byte 215 + .byte 223 + .byte 76 + .byte 70 + .byte 73 + .byte 64 + .byte 65 + .byte 74 + .byte 66 + .byte 75 + .byte 67 + .byte 72 + .byte 69 + .byte 77 + .byte 68 + .byte 78 + .byte 71 + .byte 79 + .byte 236 + .byte 230 + .byte 233 + .byte 224 + .byte 225 + .byte 234 + .byte 226 + .byte 235 + .byte 227 + .byte 232 + .byte 229 + .byte 237 + .byte 228 + .byte 238 + .byte 231 + .byte 239 + .byte 124 + .byte 118 + .byte 121 + .byte 112 + .byte 113 + .byte 122 + .byte 114 + .byte 123 + .byte 115 + .byte 120 + .byte 117 + .byte 125 + .byte 116 + .byte 126 + .byte 119 + .byte 127 + .byte 252 + .byte 246 + .byte 249 + .byte 240 + .byte 241 + .byte 250 + .byte 242 + .byte 251 + .byte 243 + .byte 248 + .byte 245 + .byte 253 + .byte 244 + .byte 254 + .byte 247 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_6, @object + .size table_6, 256 +table_6: + .byte 51 + .byte 52 + .byte 54 + .byte 56 + .byte 60 + .byte 58 + .byte 49 + .byte 62 + .byte 57 + .byte 50 + .byte 53 + .byte 55 + .byte 48 + .byte 59 + .byte 61 + .byte 63 + .byte 67 + .byte 68 + .byte 70 + .byte 72 + .byte 76 + .byte 74 + .byte 65 + .byte 78 + .byte 73 + .byte 66 + .byte 69 + .byte 71 + .byte 64 + .byte 75 + .byte 77 + .byte 79 + .byte 99 + .byte 100 + .byte 102 + .byte 104 + .byte 108 + .byte 106 + .byte 97 + .byte 110 + .byte 105 + .byte 98 + .byte 101 + .byte 103 + .byte 96 + .byte 107 + .byte 109 + .byte 111 + .byte 131 + .byte 132 + .byte 134 + .byte 136 + .byte 140 + .byte 138 + .byte 129 + .byte 142 + .byte 137 + .byte 130 + .byte 133 + .byte 135 + .byte 128 + .byte 139 + .byte 141 + .byte 143 + .byte 195 + .byte 196 + .byte 198 + .byte 200 + .byte 204 + .byte 202 + .byte 193 + .byte 206 + .byte 201 + .byte 194 + .byte 197 + .byte 199 + .byte 192 + .byte 203 + .byte 205 + .byte 207 + .byte 163 + .byte 164 + .byte 166 + .byte 168 + .byte 172 + .byte 170 + .byte 161 + .byte 174 + .byte 169 + .byte 162 + .byte 165 + .byte 167 + .byte 160 + .byte 171 + .byte 173 + .byte 175 + .byte 19 + .byte 20 + .byte 22 + .byte 24 + .byte 28 + .byte 26 + .byte 17 + .byte 30 + .byte 25 + .byte 18 + .byte 21 + .byte 23 + .byte 16 + .byte 27 + .byte 29 + .byte 31 + .byte 227 + .byte 228 + .byte 230 + .byte 232 + .byte 236 + .byte 234 + .byte 225 + .byte 238 + .byte 233 + .byte 226 + .byte 229 + .byte 231 + .byte 224 + .byte 235 + .byte 237 + .byte 239 + .byte 147 + .byte 148 + .byte 150 + .byte 152 + .byte 156 + .byte 154 + .byte 145 + .byte 158 + .byte 153 + .byte 146 + .byte 149 + .byte 151 + .byte 144 + .byte 155 + .byte 157 + .byte 159 + .byte 35 + .byte 36 + .byte 38 + .byte 40 + .byte 44 + .byte 42 + .byte 33 + .byte 46 + .byte 41 + .byte 34 + .byte 37 + .byte 39 + .byte 32 + .byte 43 + .byte 45 + .byte 47 + .byte 83 + .byte 84 + .byte 86 + .byte 88 + .byte 92 + .byte 90 + .byte 81 + .byte 94 + .byte 89 + .byte 82 + .byte 85 + .byte 87 + .byte 80 + .byte 91 + .byte 93 + .byte 95 + .byte 115 + .byte 116 + .byte 118 + .byte 120 + .byte 124 + .byte 122 + .byte 113 + .byte 126 + .byte 121 + .byte 114 + .byte 117 + .byte 119 + .byte 112 + .byte 123 + .byte 125 + .byte 127 + .byte 3 + .byte 4 + .byte 6 + .byte 8 + .byte 12 + .byte 10 + .byte 1 + .byte 14 + .byte 9 + .byte 2 + .byte 5 + .byte 7 + .byte 0 + .byte 11 + .byte 13 + .byte 15 + .byte 179 + .byte 180 + .byte 182 + .byte 184 + .byte 188 + .byte 186 + .byte 177 + .byte 190 + .byte 185 + .byte 178 + .byte 181 + .byte 183 + .byte 176 + .byte 187 + .byte 189 + .byte 191 + .byte 211 + .byte 212 + .byte 214 + .byte 216 + .byte 220 + .byte 218 + .byte 209 + .byte 222 + .byte 217 + .byte 210 + .byte 213 + .byte 215 + .byte 208 + .byte 219 + .byte 221 + .byte 223 + .byte 243 + .byte 244 + .byte 246 + .byte 248 + .byte 252 + .byte 250 + .byte 241 + .byte 254 + .byte 249 + .byte 242 + .byte 245 + .byte 247 + .byte 240 + .byte 251 + .byte 253 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_7, @object + .size table_7, 256 +table_7: + .byte 0 + .byte 2 + .byte 4 + .byte 6 + .byte 9 + .byte 11 + .byte 13 + .byte 15 + .byte 1 + .byte 3 + .byte 5 + .byte 7 + .byte 8 + .byte 10 + .byte 12 + .byte 14 + .byte 32 + .byte 34 + .byte 36 + .byte 38 + .byte 41 + .byte 43 + .byte 45 + .byte 47 + .byte 33 + .byte 35 + .byte 37 + .byte 39 + .byte 40 + .byte 42 + .byte 44 + .byte 46 + .byte 64 + .byte 66 + .byte 68 + .byte 70 + .byte 73 + .byte 75 + .byte 77 + .byte 79 + .byte 65 + .byte 67 + .byte 69 + .byte 71 + .byte 72 + .byte 74 + .byte 76 + .byte 78 + .byte 96 + .byte 98 + .byte 100 + .byte 102 + .byte 105 + .byte 107 + .byte 109 + .byte 111 + .byte 97 + .byte 99 + .byte 101 + .byte 103 + .byte 104 + .byte 106 + .byte 108 + .byte 110 + .byte 144 + .byte 146 + .byte 148 + .byte 150 + .byte 153 + .byte 155 + .byte 157 + .byte 159 + .byte 145 + .byte 147 + .byte 149 + .byte 151 + .byte 152 + .byte 154 + .byte 156 + .byte 158 + .byte 176 + .byte 178 + .byte 180 + .byte 182 + .byte 185 + .byte 187 + .byte 189 + .byte 191 + .byte 177 + .byte 179 + .byte 181 + .byte 183 + .byte 184 + .byte 186 + .byte 188 + .byte 190 + .byte 208 + .byte 210 + .byte 212 + .byte 214 + .byte 217 + .byte 219 + .byte 221 + .byte 223 + .byte 209 + .byte 211 + .byte 213 + .byte 215 + .byte 216 + .byte 218 + .byte 220 + .byte 222 + .byte 240 + .byte 242 + .byte 244 + .byte 246 + .byte 249 + .byte 251 + .byte 253 + .byte 255 + .byte 241 + .byte 243 + .byte 245 + .byte 247 + .byte 248 + .byte 250 + .byte 252 + .byte 254 + .byte 16 + .byte 18 + .byte 20 + .byte 22 + .byte 25 + .byte 27 + .byte 29 + .byte 31 + .byte 17 + .byte 19 + .byte 21 + .byte 23 + .byte 24 + .byte 26 + .byte 28 + .byte 30 + .byte 48 + .byte 50 + .byte 52 + .byte 54 + .byte 57 + .byte 59 + .byte 61 + .byte 63 + .byte 49 + .byte 51 + .byte 53 + .byte 55 + .byte 56 + .byte 58 + .byte 60 + .byte 62 + .byte 80 + .byte 82 + .byte 84 + .byte 86 + .byte 89 + .byte 91 + .byte 93 + .byte 95 + .byte 81 + .byte 83 + .byte 85 + .byte 87 + .byte 88 + .byte 90 + .byte 92 + .byte 94 + .byte 112 + .byte 114 + .byte 116 + .byte 118 + .byte 121 + .byte 123 + .byte 125 + .byte 127 + .byte 113 + .byte 115 + .byte 117 + .byte 119 + .byte 120 + .byte 122 + .byte 124 + .byte 126 + .byte 128 + .byte 130 + .byte 132 + .byte 134 + .byte 137 + .byte 139 + .byte 141 + .byte 143 + .byte 129 + .byte 131 + .byte 133 + .byte 135 + .byte 136 + .byte 138 + .byte 140 + .byte 142 + .byte 160 + .byte 162 + .byte 164 + .byte 166 + .byte 169 + .byte 171 + .byte 173 + .byte 175 + .byte 161 + .byte 163 + .byte 165 + .byte 167 + .byte 168 + .byte 170 + .byte 172 + .byte 174 + .byte 192 + .byte 194 + .byte 196 + .byte 198 + .byte 201 + .byte 203 + .byte 205 + .byte 207 + .byte 193 + .byte 195 + .byte 197 + .byte 199 + .byte 200 + .byte 202 + .byte 204 + .byte 206 + .byte 224 + .byte 226 + .byte 228 + .byte 230 + .byte 233 + .byte 235 + .byte 237 + .byte 239 + .byte 225 + .byte 227 + .byte 229 + .byte 231 + .byte 232 + .byte 234 + .byte 236 + .byte 238 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_8, @object + .size table_8, 256 +table_8: + .byte 0 + .byte 8 + .byte 1 + .byte 9 + .byte 2 + .byte 10 + .byte 3 + .byte 11 + .byte 12 + .byte 4 + .byte 13 + .byte 5 + .byte 14 + .byte 6 + .byte 15 + .byte 7 + .byte 128 + .byte 136 + .byte 129 + .byte 137 + .byte 130 + .byte 138 + .byte 131 + .byte 139 + .byte 140 + .byte 132 + .byte 141 + .byte 133 + .byte 142 + .byte 134 + .byte 143 + .byte 135 + .byte 16 + .byte 24 + .byte 17 + .byte 25 + .byte 18 + .byte 26 + .byte 19 + .byte 27 + .byte 28 + .byte 20 + .byte 29 + .byte 21 + .byte 30 + .byte 22 + .byte 31 + .byte 23 + .byte 144 + .byte 152 + .byte 145 + .byte 153 + .byte 146 + .byte 154 + .byte 147 + .byte 155 + .byte 156 + .byte 148 + .byte 157 + .byte 149 + .byte 158 + .byte 150 + .byte 159 + .byte 151 + .byte 32 + .byte 40 + .byte 33 + .byte 41 + .byte 34 + .byte 42 + .byte 35 + .byte 43 + .byte 44 + .byte 36 + .byte 45 + .byte 37 + .byte 46 + .byte 38 + .byte 47 + .byte 39 + .byte 160 + .byte 168 + .byte 161 + .byte 169 + .byte 162 + .byte 170 + .byte 163 + .byte 171 + .byte 172 + .byte 164 + .byte 173 + .byte 165 + .byte 174 + .byte 166 + .byte 175 + .byte 167 + .byte 48 + .byte 56 + .byte 49 + .byte 57 + .byte 50 + .byte 58 + .byte 51 + .byte 59 + .byte 60 + .byte 52 + .byte 61 + .byte 53 + .byte 62 + .byte 54 + .byte 63 + .byte 55 + .byte 176 + .byte 184 + .byte 177 + .byte 185 + .byte 178 + .byte 186 + .byte 179 + .byte 187 + .byte 188 + .byte 180 + .byte 189 + .byte 181 + .byte 190 + .byte 182 + .byte 191 + .byte 183 + .byte 192 + .byte 200 + .byte 193 + .byte 201 + .byte 194 + .byte 202 + .byte 195 + .byte 203 + .byte 204 + .byte 196 + .byte 205 + .byte 197 + .byte 206 + .byte 198 + .byte 207 + .byte 199 + .byte 64 + .byte 72 + .byte 65 + .byte 73 + .byte 66 + .byte 74 + .byte 67 + .byte 75 + .byte 76 + .byte 68 + .byte 77 + .byte 69 + .byte 78 + .byte 70 + .byte 79 + .byte 71 + .byte 208 + .byte 216 + .byte 209 + .byte 217 + .byte 210 + .byte 218 + .byte 211 + .byte 219 + .byte 220 + .byte 212 + .byte 221 + .byte 213 + .byte 222 + .byte 214 + .byte 223 + .byte 215 + .byte 80 + .byte 88 + .byte 81 + .byte 89 + .byte 82 + .byte 90 + .byte 83 + .byte 91 + .byte 92 + .byte 84 + .byte 93 + .byte 85 + .byte 94 + .byte 86 + .byte 95 + .byte 87 + .byte 224 + .byte 232 + .byte 225 + .byte 233 + .byte 226 + .byte 234 + .byte 227 + .byte 235 + .byte 236 + .byte 228 + .byte 237 + .byte 229 + .byte 238 + .byte 230 + .byte 239 + .byte 231 + .byte 96 + .byte 104 + .byte 97 + .byte 105 + .byte 98 + .byte 106 + .byte 99 + .byte 107 + .byte 108 + .byte 100 + .byte 109 + .byte 101 + .byte 110 + .byte 102 + .byte 111 + .byte 103 + .byte 240 + .byte 248 + .byte 241 + .byte 249 + .byte 242 + .byte 250 + .byte 243 + .byte 251 + .byte 252 + .byte 244 + .byte 253 + .byte 245 + .byte 254 + .byte 246 + .byte 255 + .byte 247 + .byte 112 + .byte 120 + .byte 113 + .byte 121 + .byte 114 + .byte 122 + .byte 115 + .byte 123 + .byte 124 + .byte 116 + .byte 125 + .byte 117 + .byte 126 + .byte 118 + .byte 127 + .byte 119 + + .text +.global forkskinny_128_256_rounds + .type forkskinny_128_256_rounds, @function +forkskinny_128_256_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 51 + ldd r2,Z+32 + ldd r3,Z+33 + ldd r4,Z+34 + ldd r5,Z+35 + ldd r6,Z+36 + ldd r7,Z+37 + ldd r8,Z+38 + ldd r9,Z+39 + ldd r10,Z+40 + ldd r11,Z+41 + ldd r12,Z+42 + ldd r13,Z+43 + ldd r14,Z+44 + ldd r15,Z+45 + ldd r24,Z+46 + ldd r25,Z+47 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + lsl r20 + std Y+33,r20 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +86: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + mov r0,r14 + mov r14,r15 + mov r15,r24 + mov r24,r25 + mov r25,r0 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + movw r18,r14 + movw r26,r24 + eor r18,r10 + eor r19,r11 + eor r26,r12 + eor r27,r13 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r18 + movw r4,r26 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + ldd r0,Y+1 + std Y+9,r0 + ldd r0,Y+2 + std Y+10,r0 + ldd r0,Y+3 + std Y+11,r0 + ldd r0,Y+4 + std Y+12,r0 + ldd r0,Y+5 + std Y+13,r0 + ldd r0,Y+6 + std Y+14,r0 + ldd r0,Y+7 + std Y+15,r0 + ldd r0,Y+8 + std Y+16,r0 + std Y+1,r19 + std Y+2,r17 + std Y+3,r18 + std Y+4,r21 + std Y+5,r26 + std Y+6,r16 + std Y+7,r20 + std Y+8,r27 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r16,Y+31 + ldd r17,Y+32 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+17 + std Y+25,r0 + ldd r0,Y+18 + std Y+26,r0 + ldd r0,Y+19 + std Y+27,r0 + ldd r0,Y+20 + std Y+28,r0 + ldd r0,Y+21 + std Y+29,r0 + ldd r0,Y+22 + std Y+30,r0 + ldd r0,Y+23 + std Y+31,r0 + ldd r0,Y+24 + std Y+32,r0 + std Y+17,r19 + std Y+18,r17 + std Y+19,r18 + std Y+20,r21 + std Y+21,r26 + std Y+22,r16 + std Y+23,r20 + std Y+24,r27 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + ldd r18,Y+33 + cp r22,r18 + breq 5259f + rjmp 86b +5259: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+32,r2 + std Z+33,r3 + std Z+34,r4 + std Z+35,r5 + std Z+36,r6 + std Z+37,r7 + std Z+38,r8 + std Z+39,r9 + std Z+40,r10 + std Z+41,r11 + std Z+42,r12 + std Z+43,r13 + std Z+44,r14 + std Z+45,r15 + std Z+46,r24 + std Z+47,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + adiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_256_rounds, .-forkskinny_128_256_rounds + + .text +.global forkskinny_128_256_inv_rounds + .type forkskinny_128_256_inv_rounds, @function +forkskinny_128_256_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 51 + ldd r2,Z+32 + ldd r3,Z+33 + ldd r4,Z+34 + ldd r5,Z+35 + ldd r6,Z+36 + ldd r7,Z+37 + ldd r8,Z+38 + ldd r9,Z+39 + ldd r10,Z+40 + ldd r11,Z+41 + ldd r12,Z+42 + ldd r13,Z+43 + ldd r14,Z+44 + ldd r15,Z+45 + ldd r24,Z+46 + ldd r25,Z+47 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + lsl r20 + std Y+33,r20 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +86: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + ldd r0,Y+9 + std Y+1,r0 + ldd r0,Y+10 + std Y+2,r0 + ldd r0,Y+11 + std Y+3,r0 + ldd r0,Y+12 + std Y+4,r0 + ldd r0,Y+13 + std Y+5,r0 + ldd r0,Y+14 + std Y+6,r0 + ldd r0,Y+15 + std Y+7,r0 + ldd r0,Y+16 + std Y+8,r0 + std Y+9,r26 + std Y+10,r18 + std Y+11,r20 + std Y+12,r17 + std Y+13,r16 + std Y+14,r27 + std Y+15,r21 + std Y+16,r19 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r16,Y+23 + ldd r17,Y+24 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+25 + std Y+17,r0 + ldd r0,Y+26 + std Y+18,r0 + ldd r0,Y+27 + std Y+19,r0 + ldd r0,Y+28 + std Y+20,r0 + ldd r0,Y+29 + std Y+21,r0 + ldd r0,Y+30 + std Y+22,r0 + ldd r0,Y+31 + std Y+23,r0 + ldd r0,Y+32 + std Y+24,r0 + std Y+25,r26 + std Y+26,r18 + std Y+27,r20 + std Y+28,r17 + std Y+29,r16 + std Y+30,r27 + std Y+31,r21 + std Y+32,r19 + movw r18,r2 + movw r26,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r18 + movw r24,r26 + eor r14,r10 + eor r15,r11 + eor r24,r12 + eor r25,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + mov r0,r6 + mov r6,r7 + mov r7,r8 + mov r8,r9 + mov r9,r0 + mov r0,r10 + mov r10,r12 + mov r12,r0 + mov r0,r11 + mov r11,r13 + mov r13,r0 + mov r0,r25 + mov r25,r24 + mov r24,r15 + mov r15,r14 + mov r14,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + ldi r30,lo8(table_1) + ldi r31,hi8(table_1) +#if defined(RAMPZ) + ldi r18,hh8(table_1) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+33 + cp r22,r18 + breq 5259f + rjmp 86b +5259: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+32,r2 + std Z+33,r3 + std Z+34,r4 + std Z+35,r5 + std Z+36,r6 + std Z+37,r7 + std Z+38,r8 + std Z+39,r9 + std Z+40,r10 + std Z+41,r11 + std Z+42,r12 + std Z+43,r13 + std Z+44,r14 + std Z+45,r15 + std Z+46,r24 + std Z+47,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + adiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_256_inv_rounds, .-forkskinny_128_256_inv_rounds + + .text +.global forkskinny_128_256_forward_tk + .type forkskinny_128_256_forward_tk, @function +forkskinny_128_256_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 34 + ld r4,Z + ldd r5,Z+1 + ldd r6,Z+2 + ldd r7,Z+3 + ldd r8,Z+4 + ldd r9,Z+5 + ldd r10,Z+6 + ldd r11,Z+7 + ldd r12,Z+8 + ldd r13,Z+9 + ldd r14,Z+10 + ldd r15,Z+11 + ldd r24,Z+12 + ldd r25,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + push r31 + push r30 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r23,hh8(table_2) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +51: + movw r18,r12 + movw r20,r14 + movw r26,r24 + movw r2,r16 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + mov r4,r19 + mov r5,r3 + mov r6,r18 + mov r7,r27 + mov r8,r20 + mov r9,r2 + mov r10,r26 + mov r11,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r26,Y+13 + ldd r27,Y+14 + ldd r2,Y+15 + ldd r3,Y+16 + ldd r23,Y+1 + std Y+9,r23 + ldd r23,Y+2 + std Y+10,r23 + ldd r23,Y+3 + std Y+11,r23 + ldd r23,Y+4 + std Y+12,r23 + ldd r23,Y+5 + std Y+13,r23 + ldd r23,Y+6 + std Y+14,r23 + ldd r23,Y+7 + std Y+15,r23 + ldd r23,Y+8 + std Y+16,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+1,r19 + std Y+2,r3 + std Y+3,r18 + std Y+4,r27 + std Y+5,r20 + std Y+6,r2 + std Y+7,r26 + std Y+8,r21 + dec r22 + breq 5109f + rjmp 51b +5109: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r4 + std Z+1,r5 + std Z+2,r6 + std Z+3,r7 + std Z+4,r8 + std Z+5,r9 + std Z+6,r10 + std Z+7,r11 + std Z+8,r12 + std Z+9,r13 + std Z+10,r14 + std Z+11,r15 + std Z+12,r24 + std Z+13,r25 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_256_forward_tk, .-forkskinny_128_256_forward_tk + + .text +.global forkskinny_128_256_reverse_tk + .type forkskinny_128_256_reverse_tk, @function +forkskinny_128_256_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 34 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r23,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +51: + movw r18,r2 + movw r20,r4 + movw r26,r6 + movw r24,r8 + movw r2,r10 + movw r4,r12 + movw r6,r14 + movw r8,r16 + mov r10,r20 + mov r11,r18 + mov r12,r26 + mov r13,r25 + mov r14,r24 + mov r15,r21 + mov r16,r27 + mov r17,r19 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r24,Y+7 + ldd r25,Y+8 + ldd r23,Y+9 + std Y+1,r23 + ldd r23,Y+10 + std Y+2,r23 + ldd r23,Y+11 + std Y+3,r23 + ldd r23,Y+12 + std Y+4,r23 + ldd r23,Y+13 + std Y+5,r23 + ldd r23,Y+14 + std Y+6,r23 + ldd r23,Y+15 + std Y+7,r23 + ldd r23,Y+16 + std Y+8,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+9,r20 + std Y+10,r18 + std Y+11,r26 + std Y+12,r25 + std Y+13,r24 + std Y+14,r21 + std Y+15,r27 + std Y+16,r19 + dec r22 + breq 5109f + rjmp 51b +5109: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r2 + std Z+1,r3 + std Z+2,r4 + std Z+3,r5 + std Z+4,r6 + std Z+5,r7 + std Z+6,r8 + std Z+7,r9 + std Z+8,r10 + std Z+9,r11 + std Z+10,r12 + std Z+11,r13 + std Z+12,r14 + std Z+13,r15 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_256_reverse_tk, .-forkskinny_128_256_reverse_tk + + .text +.global forkskinny_128_384_rounds + .type forkskinny_128_384_rounds, @function +forkskinny_128_384_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 67 + ldd r2,Z+48 + ldd r3,Z+49 + ldd r4,Z+50 + ldd r5,Z+51 + ldd r6,Z+52 + ldd r7,Z+53 + ldd r8,Z+54 + ldd r9,Z+55 + ldd r10,Z+56 + ldd r11,Z+57 + ldd r12,Z+58 + ldd r13,Z+59 + ldd r14,Z+60 + ldd r15,Z+61 + ldd r24,Z+62 + ldd r25,Z+63 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r26,Z+34 + ldd r27,Z+35 + std Y+33,r18 + std Y+34,r19 + std Y+35,r26 + std Y+36,r27 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + std Y+37,r18 + std Y+38,r19 + std Y+39,r26 + std Y+40,r27 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r26,Z+42 + ldd r27,Z+43 + std Y+41,r18 + std Y+42,r19 + std Y+43,r26 + std Y+44,r27 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r26,Z+46 + ldd r27,Z+47 + std Y+45,r18 + std Y+46,r19 + std Y+47,r26 + std Y+48,r27 + lsl r20 + std Y+49,r20 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +118: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + mov r0,r14 + mov r14,r15 + mov r15,r24 + mov r24,r25 + mov r25,r0 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + movw r18,r14 + movw r26,r24 + eor r18,r10 + eor r19,r11 + eor r26,r12 + eor r27,r13 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r18 + movw r4,r26 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + ldd r0,Y+1 + std Y+9,r0 + ldd r0,Y+2 + std Y+10,r0 + ldd r0,Y+3 + std Y+11,r0 + ldd r0,Y+4 + std Y+12,r0 + ldd r0,Y+5 + std Y+13,r0 + ldd r0,Y+6 + std Y+14,r0 + ldd r0,Y+7 + std Y+15,r0 + ldd r0,Y+8 + std Y+16,r0 + std Y+1,r19 + std Y+2,r17 + std Y+3,r18 + std Y+4,r21 + std Y+5,r26 + std Y+6,r16 + std Y+7,r20 + std Y+8,r27 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r16,Y+31 + ldd r17,Y+32 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+17 + std Y+25,r0 + ldd r0,Y+18 + std Y+26,r0 + ldd r0,Y+19 + std Y+27,r0 + ldd r0,Y+20 + std Y+28,r0 + ldd r0,Y+21 + std Y+29,r0 + ldd r0,Y+22 + std Y+30,r0 + ldd r0,Y+23 + std Y+31,r0 + ldd r0,Y+24 + std Y+32,r0 + std Y+17,r19 + std Y+18,r17 + std Y+19,r18 + std Y+20,r21 + std Y+21,r26 + std Y+22,r16 + std Y+23,r20 + std Y+24,r27 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + ldd r20,Y+45 + ldd r21,Y+46 + ldd r16,Y+47 + ldd r17,Y+48 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+33 + std Y+41,r0 + ldd r0,Y+34 + std Y+42,r0 + ldd r0,Y+35 + std Y+43,r0 + ldd r0,Y+36 + std Y+44,r0 + ldd r0,Y+37 + std Y+45,r0 + ldd r0,Y+38 + std Y+46,r0 + ldd r0,Y+39 + std Y+47,r0 + ldd r0,Y+40 + std Y+48,r0 + std Y+33,r19 + std Y+34,r17 + std Y+35,r18 + std Y+36,r21 + std Y+37,r26 + std Y+38,r16 + std Y+39,r20 + std Y+40,r27 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + ldd r18,Y+49 + cp r22,r18 + breq 5348f + rjmp 118b +5348: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+48,r2 + std Z+49,r3 + std Z+50,r4 + std Z+51,r5 + std Z+52,r6 + std Z+53,r7 + std Z+54,r8 + std Z+55,r9 + std Z+56,r10 + std Z+57,r11 + std Z+58,r12 + std Z+59,r13 + std Z+60,r14 + std Z+61,r15 + std Z+62,r24 + std Z+63,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + std Z+32,r18 + std Z+33,r19 + std Z+34,r26 + std Z+35,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + std Z+36,r18 + std Z+37,r19 + std Z+38,r26 + std Z+39,r27 + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + std Z+40,r18 + std Z+41,r19 + std Z+42,r26 + std Z+43,r27 + ldd r18,Y+45 + ldd r19,Y+46 + ldd r26,Y+47 + ldd r27,Y+48 + std Z+44,r18 + std Z+45,r19 + std Z+46,r26 + std Z+47,r27 + adiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_384_rounds, .-forkskinny_128_384_rounds + + .text +.global forkskinny_128_384_inv_rounds + .type forkskinny_128_384_inv_rounds, @function +forkskinny_128_384_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 67 + ldd r2,Z+48 + ldd r3,Z+49 + ldd r4,Z+50 + ldd r5,Z+51 + ldd r6,Z+52 + ldd r7,Z+53 + ldd r8,Z+54 + ldd r9,Z+55 + ldd r10,Z+56 + ldd r11,Z+57 + ldd r12,Z+58 + ldd r13,Z+59 + ldd r14,Z+60 + ldd r15,Z+61 + ldd r24,Z+62 + ldd r25,Z+63 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r26,Z+34 + ldd r27,Z+35 + std Y+33,r18 + std Y+34,r19 + std Y+35,r26 + std Y+36,r27 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + std Y+37,r18 + std Y+38,r19 + std Y+39,r26 + std Y+40,r27 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r26,Z+42 + ldd r27,Z+43 + std Y+41,r18 + std Y+42,r19 + std Y+43,r26 + std Y+44,r27 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r26,Z+46 + ldd r27,Z+47 + std Y+45,r18 + std Y+46,r19 + std Y+47,r26 + std Y+48,r27 + lsl r20 + std Y+49,r20 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +118: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + ldd r0,Y+9 + std Y+1,r0 + ldd r0,Y+10 + std Y+2,r0 + ldd r0,Y+11 + std Y+3,r0 + ldd r0,Y+12 + std Y+4,r0 + ldd r0,Y+13 + std Y+5,r0 + ldd r0,Y+14 + std Y+6,r0 + ldd r0,Y+15 + std Y+7,r0 + ldd r0,Y+16 + std Y+8,r0 + std Y+9,r26 + std Y+10,r18 + std Y+11,r20 + std Y+12,r17 + std Y+13,r16 + std Y+14,r27 + std Y+15,r21 + std Y+16,r19 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r16,Y+23 + ldd r17,Y+24 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+25 + std Y+17,r0 + ldd r0,Y+26 + std Y+18,r0 + ldd r0,Y+27 + std Y+19,r0 + ldd r0,Y+28 + std Y+20,r0 + ldd r0,Y+29 + std Y+21,r0 + ldd r0,Y+30 + std Y+22,r0 + ldd r0,Y+31 + std Y+23,r0 + ldd r0,Y+32 + std Y+24,r0 + std Y+25,r26 + std Y+26,r18 + std Y+27,r20 + std Y+28,r17 + std Y+29,r16 + std Y+30,r27 + std Y+31,r21 + std Y+32,r19 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + ldd r20,Y+37 + ldd r21,Y+38 + ldd r16,Y+39 + ldd r17,Y+40 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+41 + std Y+33,r0 + ldd r0,Y+42 + std Y+34,r0 + ldd r0,Y+43 + std Y+35,r0 + ldd r0,Y+44 + std Y+36,r0 + ldd r0,Y+45 + std Y+37,r0 + ldd r0,Y+46 + std Y+38,r0 + ldd r0,Y+47 + std Y+39,r0 + ldd r0,Y+48 + std Y+40,r0 + std Y+41,r26 + std Y+42,r18 + std Y+43,r20 + std Y+44,r17 + std Y+45,r16 + std Y+46,r27 + std Y+47,r21 + std Y+48,r19 + movw r18,r2 + movw r26,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r18 + movw r24,r26 + eor r14,r10 + eor r15,r11 + eor r24,r12 + eor r25,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + mov r0,r6 + mov r6,r7 + mov r7,r8 + mov r8,r9 + mov r9,r0 + mov r0,r10 + mov r10,r12 + mov r12,r0 + mov r0,r11 + mov r11,r13 + mov r13,r0 + mov r0,r25 + mov r25,r24 + mov r24,r15 + mov r15,r14 + mov r14,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + ldi r30,lo8(table_1) + ldi r31,hi8(table_1) +#if defined(RAMPZ) + ldi r18,hh8(table_1) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+49 + cp r22,r18 + breq 5348f + rjmp 118b +5348: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+48,r2 + std Z+49,r3 + std Z+50,r4 + std Z+51,r5 + std Z+52,r6 + std Z+53,r7 + std Z+54,r8 + std Z+55,r9 + std Z+56,r10 + std Z+57,r11 + std Z+58,r12 + std Z+59,r13 + std Z+60,r14 + std Z+61,r15 + std Z+62,r24 + std Z+63,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + std Z+32,r18 + std Z+33,r19 + std Z+34,r26 + std Z+35,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + std Z+36,r18 + std Z+37,r19 + std Z+38,r26 + std Z+39,r27 + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + std Z+40,r18 + std Z+41,r19 + std Z+42,r26 + std Z+43,r27 + ldd r18,Y+45 + ldd r19,Y+46 + ldd r26,Y+47 + ldd r27,Y+48 + std Z+44,r18 + std Z+45,r19 + std Z+46,r26 + std Z+47,r27 + adiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_384_inv_rounds, .-forkskinny_128_384_inv_rounds + + .text +.global forkskinny_128_384_forward_tk + .type forkskinny_128_384_forward_tk, @function +forkskinny_128_384_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r4,Z + ldd r5,Z+1 + ldd r6,Z+2 + ldd r7,Z+3 + ldd r8,Z+4 + ldd r9,Z+5 + ldd r10,Z+6 + ldd r11,Z+7 + ldd r12,Z+8 + ldd r13,Z+9 + ldd r14,Z+10 + ldd r15,Z+11 + ldd r24,Z+12 + ldd r25,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + std Y+17,r18 + std Y+18,r19 + std Y+19,r20 + std Y+20,r21 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r20,Z+38 + ldd r21,Z+39 + std Y+21,r18 + std Y+22,r19 + std Y+23,r20 + std Y+24,r21 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r20,Z+42 + ldd r21,Z+43 + std Y+25,r18 + std Y+26,r19 + std Y+27,r20 + std Y+28,r21 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r20,Z+46 + ldd r21,Z+47 + std Y+29,r18 + std Y+30,r19 + std Y+31,r20 + std Y+32,r21 + push r31 + push r30 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r23,hh8(table_2) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +83: + movw r18,r12 + movw r20,r14 + movw r26,r24 + movw r2,r16 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + mov r4,r19 + mov r5,r3 + mov r6,r18 + mov r7,r27 + mov r8,r20 + mov r9,r2 + mov r10,r26 + mov r11,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r26,Y+13 + ldd r27,Y+14 + ldd r2,Y+15 + ldd r3,Y+16 + ldd r23,Y+1 + std Y+9,r23 + ldd r23,Y+2 + std Y+10,r23 + ldd r23,Y+3 + std Y+11,r23 + ldd r23,Y+4 + std Y+12,r23 + ldd r23,Y+5 + std Y+13,r23 + ldd r23,Y+6 + std Y+14,r23 + ldd r23,Y+7 + std Y+15,r23 + ldd r23,Y+8 + std Y+16,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+1,r19 + std Y+2,r3 + std Y+3,r18 + std Y+4,r27 + std Y+5,r20 + std Y+6,r2 + std Y+7,r26 + std Y+8,r21 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + ldd r26,Y+29 + ldd r27,Y+30 + ldd r2,Y+31 + ldd r3,Y+32 + ldd r23,Y+17 + std Y+25,r23 + ldd r23,Y+18 + std Y+26,r23 + ldd r23,Y+19 + std Y+27,r23 + ldd r23,Y+20 + std Y+28,r23 + ldd r23,Y+21 + std Y+29,r23 + ldd r23,Y+22 + std Y+30,r23 + ldd r23,Y+23 + std Y+31,r23 + ldd r23,Y+24 + std Y+32,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+17,r19 + std Y+18,r3 + std Y+19,r18 + std Y+20,r27 + std Y+21,r20 + std Y+22,r2 + std Y+23,r26 + std Y+24,r21 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + breq 5183f + rjmp 83b +5183: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r4 + std Z+1,r5 + std Z+2,r6 + std Z+3,r7 + std Z+4,r8 + std Z+5,r9 + std Z+6,r10 + std Z+7,r11 + std Z+8,r12 + std Z+9,r13 + std Z+10,r14 + std Z+11,r15 + std Z+12,r24 + std Z+13,r25 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r20,Y+23 + ldd r21,Y+24 + std Z+36,r18 + std Z+37,r19 + std Z+38,r20 + std Z+39,r21 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + std Z+40,r18 + std Z+41,r19 + std Z+42,r20 + std Z+43,r21 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r20,Y+31 + ldd r21,Y+32 + std Z+44,r18 + std Z+45,r19 + std Z+46,r20 + std Z+47,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_384_forward_tk, .-forkskinny_128_384_forward_tk + + .text +.global forkskinny_128_384_reverse_tk + .type forkskinny_128_384_reverse_tk, @function +forkskinny_128_384_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + std Y+17,r18 + std Y+18,r19 + std Y+19,r20 + std Y+20,r21 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r20,Z+38 + ldd r21,Z+39 + std Y+21,r18 + std Y+22,r19 + std Y+23,r20 + std Y+24,r21 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r20,Z+42 + ldd r21,Z+43 + std Y+25,r18 + std Y+26,r19 + std Y+27,r20 + std Y+28,r21 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r20,Z+46 + ldd r21,Z+47 + std Y+29,r18 + std Y+30,r19 + std Y+31,r20 + std Y+32,r21 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r23,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +83: + movw r18,r2 + movw r20,r4 + movw r26,r6 + movw r24,r8 + movw r2,r10 + movw r4,r12 + movw r6,r14 + movw r8,r16 + mov r10,r20 + mov r11,r18 + mov r12,r26 + mov r13,r25 + mov r14,r24 + mov r15,r21 + mov r16,r27 + mov r17,r19 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r24,Y+7 + ldd r25,Y+8 + ldd r23,Y+9 + std Y+1,r23 + ldd r23,Y+10 + std Y+2,r23 + ldd r23,Y+11 + std Y+3,r23 + ldd r23,Y+12 + std Y+4,r23 + ldd r23,Y+13 + std Y+5,r23 + ldd r23,Y+14 + std Y+6,r23 + ldd r23,Y+15 + std Y+7,r23 + ldd r23,Y+16 + std Y+8,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+9,r20 + std Y+10,r18 + std Y+11,r26 + std Y+12,r25 + std Y+13,r24 + std Y+14,r21 + std Y+15,r27 + std Y+16,r19 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + ldd r26,Y+21 + ldd r27,Y+22 + ldd r24,Y+23 + ldd r25,Y+24 + ldd r23,Y+25 + std Y+17,r23 + ldd r23,Y+26 + std Y+18,r23 + ldd r23,Y+27 + std Y+19,r23 + ldd r23,Y+28 + std Y+20,r23 + ldd r23,Y+29 + std Y+21,r23 + ldd r23,Y+30 + std Y+22,r23 + ldd r23,Y+31 + std Y+23,r23 + ldd r23,Y+32 + std Y+24,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+25,r20 + std Y+26,r18 + std Y+27,r26 + std Y+28,r25 + std Y+29,r24 + std Y+30,r21 + std Y+31,r27 + std Y+32,r19 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + breq 5183f + rjmp 83b +5183: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r2 + std Z+1,r3 + std Z+2,r4 + std Z+3,r5 + std Z+4,r6 + std Z+5,r7 + std Z+6,r8 + std Z+7,r9 + std Z+8,r10 + std Z+9,r11 + std Z+10,r12 + std Z+11,r13 + std Z+12,r14 + std Z+13,r15 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r20,Y+23 + ldd r21,Y+24 + std Z+36,r18 + std Z+37,r19 + std Z+38,r20 + std Z+39,r21 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + std Z+40,r18 + std Z+41,r19 + std Z+42,r20 + std Z+43,r21 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r20,Y+31 + ldd r21,Y+32 + std Z+44,r18 + std Z+45,r19 + std Z+46,r20 + std Z+47,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_384_reverse_tk, .-forkskinny_128_384_reverse_tk + + .text +.global forkskinny_64_192_rounds + .type forkskinny_64_192_rounds, @function +forkskinny_64_192_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 38 + ldd r26,Z+24 + ldd r27,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + ld r18,Z + ldd r19,Z+1 + std Y+1,r18 + std Y+2,r19 + ldd r18,Z+2 + ldd r19,Z+3 + std Y+3,r18 + std Y+4,r19 + ldd r18,Z+4 + ldd r19,Z+5 + std Y+5,r18 + std Y+6,r19 + ldd r18,Z+6 + ldd r19,Z+7 + std Y+7,r18 + std Y+8,r19 + ldd r18,Z+8 + ldd r19,Z+9 + std Y+9,r18 + std Y+10,r19 + ldd r18,Z+10 + ldd r19,Z+11 + std Y+11,r18 + std Y+12,r19 + ldd r18,Z+12 + ldd r19,Z+13 + std Y+13,r18 + std Y+14,r19 + ldd r18,Z+14 + ldd r19,Z+15 + std Y+15,r18 + std Y+16,r19 + ldd r18,Z+16 + ldd r19,Z+17 + std Y+17,r18 + std Y+18,r19 + ldd r18,Z+18 + ldd r19,Z+19 + std Y+19,r18 + std Y+20,r19 + ldd r18,Z+20 + ldd r19,Z+21 + std Y+21,r18 + std Y+22,r19 + ldd r18,Z+22 + ldd r19,Z+23 + std Y+23,r18 + std Y+24,r19 + push r31 + push r30 + ldi r30,lo8(table_5) + ldi r31,hi8(table_5) +#if defined(RAMPZ) + ldi r18,hh8(table_5) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 + lsl r20 +61: + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + eor r26,r18 + eor r27,r19 + ldd r18,Y+3 + ldd r19,Y+4 + eor r2,r18 + eor r3,r19 + ldd r18,Y+9 + ldd r19,Y+10 + eor r26,r18 + eor r27,r19 + ldd r18,Y+11 + ldd r19,Y+12 + eor r2,r18 + eor r3,r19 + ldd r18,Y+17 + ldd r19,Y+18 + eor r26,r18 + eor r27,r19 + ldd r18,Y+19 + ldd r19,Y+20 + eor r2,r18 + eor r3,r19 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r27,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r3,r18 + ldi r18,32 + eor r5,r18 + eor r26,r18 + mov r0,r1 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + or r3,r0 + mov r0,r4 + mov r4,r5 + mov r5,r0 + mov r0,r6 + mov r6,r7 + mov r7,r0 + mov r0,r1 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + or r7,r0 + eor r2,r4 + eor r3,r5 + eor r4,r26 + eor r5,r27 + movw r18,r6 + eor r18,r4 + eor r19,r5 + movw r6,r4 + movw r4,r2 + movw r2,r26 + movw r26,r18 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r8,Y+3 + ldd r9,Y+4 + ldd r10,Y+5 + ldd r11,Y+6 + ldd r12,Y+7 + ldd r13,Y+8 + std Y+5,r18 + std Y+6,r19 + std Y+7,r8 + std Y+8,r9 + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + ldi r25,240 + and r9,r25 + swap r12 + ldi r24,15 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+1,r18 + std Y+2,r19 + std Y+3,r8 + std Y+4,r9 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r18,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+9 + ldd r19,Y+10 + ldd r8,Y+11 + ldd r9,Y+12 + ldd r10,Y+13 + ldd r11,Y+14 + ldd r12,Y+15 + ldd r13,Y+16 + std Y+13,r18 + std Y+14,r19 + std Y+15,r8 + std Y+16,r9 + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + and r9,r25 + swap r12 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+9,r18 + std Y+10,r19 + std Y+11,r8 + std Y+12,r9 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r8,Y+19 + ldd r9,Y+20 + ldd r10,Y+21 + ldd r11,Y+22 + ldd r12,Y+23 + ldd r13,Y+24 + std Y+21,r18 + std Y+22,r19 + std Y+23,r8 + std Y+24,r9 + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + and r9,r25 + swap r12 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+17,r18 + std Y+18,r19 + std Y+19,r8 + std Y+20,r9 + ldi r30,lo8(table_5) + ldi r31,hi8(table_5) +#if defined(RAMPZ) + ldi r18,hh8(table_5) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + cp r22,r20 + breq 5273f + rjmp 61b +5273: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+24,r26 + std Z+25,r27 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Y+1 + ldd r19,Y+2 + st Z,r18 + std Z+1,r19 + ldd r18,Y+3 + ldd r19,Y+4 + std Z+2,r18 + std Z+3,r19 + ldd r18,Y+5 + ldd r19,Y+6 + std Z+4,r18 + std Z+5,r19 + ldd r18,Y+7 + ldd r19,Y+8 + std Z+6,r18 + std Z+7,r19 + ldd r18,Y+9 + ldd r19,Y+10 + std Z+8,r18 + std Z+9,r19 + ldd r18,Y+11 + ldd r19,Y+12 + std Z+10,r18 + std Z+11,r19 + ldd r18,Y+13 + ldd r19,Y+14 + std Z+12,r18 + std Z+13,r19 + ldd r18,Y+15 + ldd r19,Y+16 + std Z+14,r18 + std Z+15,r19 + ldd r18,Y+17 + ldd r19,Y+18 + std Z+16,r18 + std Z+17,r19 + ldd r18,Y+19 + ldd r19,Y+20 + std Z+18,r18 + std Z+19,r19 + ldd r18,Y+21 + ldd r19,Y+22 + std Z+20,r18 + std Z+21,r19 + ldd r18,Y+23 + ldd r19,Y+24 + std Z+22,r18 + std Z+23,r19 + adiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_rounds, .-forkskinny_64_192_rounds + + .text +.global forkskinny_64_192_inv_rounds + .type forkskinny_64_192_inv_rounds, @function +forkskinny_64_192_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 38 + ldd r26,Z+24 + ldd r27,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + ld r18,Z + ldd r19,Z+1 + std Y+1,r18 + std Y+2,r19 + ldd r18,Z+2 + ldd r19,Z+3 + std Y+3,r18 + std Y+4,r19 + ldd r18,Z+4 + ldd r19,Z+5 + std Y+5,r18 + std Y+6,r19 + ldd r18,Z+6 + ldd r19,Z+7 + std Y+7,r18 + std Y+8,r19 + ldd r18,Z+8 + ldd r19,Z+9 + std Y+9,r18 + std Y+10,r19 + ldd r18,Z+10 + ldd r19,Z+11 + std Y+11,r18 + std Y+12,r19 + ldd r18,Z+12 + ldd r19,Z+13 + std Y+13,r18 + std Y+14,r19 + ldd r18,Z+14 + ldd r19,Z+15 + std Y+15,r18 + std Y+16,r19 + ldd r18,Z+16 + ldd r19,Z+17 + std Y+17,r18 + std Y+18,r19 + ldd r18,Z+18 + ldd r19,Z+19 + std Y+19,r18 + std Y+20,r19 + ldd r18,Z+20 + ldd r19,Z+21 + std Y+21,r18 + std Y+22,r19 + ldd r18,Z+22 + ldd r19,Z+23 + std Y+23,r18 + std Y+24,r19 + push r31 + push r30 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 + lsl r20 +61: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r8,Y+3 + ldd r9,Y+4 + ldd r10,Y+5 + ldd r11,Y+6 + ldd r12,Y+7 + ldd r13,Y+8 + std Y+1,r10 + std Y+2,r11 + std Y+3,r12 + std Y+4,r13 + mov r11,r18 + ldi r25,240 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+5,r10 + std Y+6,r11 + std Y+7,r12 + std Y+8,r13 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r8,Y+11 + ldd r9,Y+12 + ldd r10,Y+13 + ldd r11,Y+14 + ldd r12,Y+15 + ldd r13,Y+16 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r11,r18 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+13,r10 + std Y+14,r11 + std Y+15,r12 + std Y+16,r13 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r18,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r8,Y+19 + ldd r9,Y+20 + ldd r10,Y+21 + ldd r11,Y+22 + ldd r12,Y+23 + ldd r13,Y+24 + std Y+17,r10 + std Y+18,r11 + std Y+19,r12 + std Y+20,r13 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r11,r18 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+21,r10 + std Y+22,r11 + std Y+23,r12 + std Y+24,r13 + movw r18,r26 + movw r26,r2 + movw r2,r4 + movw r4,r6 + movw r6,r18 + eor r6,r4 + eor r7,r5 + eor r4,r26 + eor r5,r27 + eor r2,r4 + eor r3,r5 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + mov r0,r5 + mov r5,r4 + mov r4,r0 + mov r0,r7 + mov r7,r6 + mov r6,r0 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + ldd r18,Y+1 + ldd r19,Y+2 + eor r26,r18 + eor r27,r19 + ldd r18,Y+3 + ldd r19,Y+4 + eor r2,r18 + eor r3,r19 + ldd r18,Y+9 + ldd r19,Y+10 + eor r26,r18 + eor r27,r19 + ldd r18,Y+11 + ldd r19,Y+12 + eor r2,r18 + eor r3,r19 + ldd r18,Y+17 + ldd r19,Y+18 + eor r26,r18 + eor r27,r19 + ldd r18,Y+19 + ldd r19,Y+20 + eor r2,r18 + eor r3,r19 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r3,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r27,r18 + ldi r18,32 + eor r5,r18 + eor r26,r18 + ldi r30,lo8(table_6) + ldi r31,hi8(table_6) +#if defined(RAMPZ) + ldi r18,hh8(table_6) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + cp r22,r20 + breq 5268f + rjmp 61b +5268: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+24,r26 + std Z+25,r27 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Y+1 + ldd r19,Y+2 + st Z,r18 + std Z+1,r19 + ldd r18,Y+3 + ldd r19,Y+4 + std Z+2,r18 + std Z+3,r19 + ldd r18,Y+5 + ldd r19,Y+6 + std Z+4,r18 + std Z+5,r19 + ldd r18,Y+7 + ldd r19,Y+8 + std Z+6,r18 + std Z+7,r19 + ldd r18,Y+9 + ldd r19,Y+10 + std Z+8,r18 + std Z+9,r19 + ldd r18,Y+11 + ldd r19,Y+12 + std Z+10,r18 + std Z+11,r19 + ldd r18,Y+13 + ldd r19,Y+14 + std Z+12,r18 + std Z+13,r19 + ldd r18,Y+15 + ldd r19,Y+16 + std Z+14,r18 + std Z+15,r19 + ldd r18,Y+17 + ldd r19,Y+18 + std Z+16,r18 + std Z+17,r19 + ldd r18,Y+19 + ldd r19,Y+20 + std Z+18,r18 + std Z+19,r19 + ldd r18,Y+21 + ldd r19,Y+22 + std Z+20,r18 + std Z+21,r19 + ldd r18,Y+23 + ldd r19,Y+24 + std Z+22,r18 + std Z+23,r19 + adiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_inv_rounds, .-forkskinny_64_192_inv_rounds + + .text +.global forkskinny_64_192_forward_tk + .type forkskinny_64_192_forward_tk, @function +forkskinny_64_192_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r28,Z+6 + ldd r29,Z+7 + ldd r2,Z+8 + ldd r3,Z+9 + ldd r4,Z+10 + ldd r5,Z+11 + ldd r6,Z+12 + ldd r7,Z+13 + ldd r8,Z+14 + ldd r9,Z+15 + ldd r10,Z+16 + ldd r11,Z+17 + ldd r12,Z+18 + ldd r13,Z+19 + ldd r14,Z+20 + ldd r15,Z+21 + ldd r24,Z+22 + ldd r25,Z+23 + push r31 + push r30 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +27: + push r19 + push r18 + push r21 + push r20 + mov r19,r27 + swap r19 + andi r19,240 + mov r23,r28 + andi r23,15 + or r19,r23 + mov r18,r27 + andi r18,240 + mov r23,r29 + andi r23,15 + or r18,r23 + mov r21,r26 + andi r21,240 + swap r28 + andi r28,15 + or r21,r28 + mov r20,r29 + andi r20,240 + andi r26,15 + or r20,r26 + pop r28 + pop r29 + pop r26 + pop r27 + push r3 + push r2 + push r5 + push r4 + mov r3,r7 + swap r3 + ldi r17,240 + and r3,r17 + mov r23,r8 + andi r23,15 + or r3,r23 + mov r2,r7 + and r2,r17 + mov r23,r9 + andi r23,15 + or r2,r23 + mov r5,r6 + and r5,r17 + swap r8 + ldi r16,15 + and r8,r16 + or r5,r8 + mov r4,r9 + and r4,r17 + and r6,r16 + or r4,r6 + pop r8 + pop r9 + pop r6 + pop r7 + push r11 + push r10 + push r13 + push r12 + mov r11,r15 + swap r11 + and r11,r17 + mov r23,r24 + andi r23,15 + or r11,r23 + mov r10,r15 + and r10,r17 + mov r23,r25 + andi r23,15 + or r10,r23 + mov r13,r14 + and r13,r17 + swap r24 + andi r24,15 + or r13,r24 + mov r12,r25 + and r12,r17 + and r14,r16 + or r12,r14 + pop r24 + pop r25 + pop r14 + pop r15 + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + dec r22 + breq 5125f + rjmp 27b +5125: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r26 + std Z+5,r27 + std Z+6,r28 + std Z+7,r29 + std Z+8,r2 + std Z+9,r3 + std Z+10,r4 + std Z+11,r5 + std Z+12,r6 + std Z+13,r7 + std Z+14,r8 + std Z+15,r9 + std Z+16,r10 + std Z+17,r11 + std Z+18,r12 + std Z+19,r13 + std Z+20,r14 + std Z+21,r15 + std Z+22,r24 + std Z+23,r25 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_forward_tk, .-forkskinny_64_192_forward_tk + + .text +.global forkskinny_64_192_reverse_tk + .type forkskinny_64_192_reverse_tk, @function +forkskinny_64_192_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r28,Z+6 + ldd r29,Z+7 + ldd r2,Z+8 + ldd r3,Z+9 + ldd r4,Z+10 + ldd r5,Z+11 + ldd r6,Z+12 + ldd r7,Z+13 + ldd r8,Z+14 + ldd r9,Z+15 + ldd r10,Z+16 + ldd r11,Z+17 + ldd r12,Z+18 + ldd r13,Z+19 + ldd r14,Z+20 + ldd r15,Z+21 + ldd r24,Z+22 + ldd r25,Z+23 + push r31 + push r30 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +27: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + push r27 + push r26 + push r29 + push r28 + mov r27,r18 + andi r27,240 + mov r23,r19 + swap r23 + andi r23,15 + or r27,r23 + mov r26,r21 + andi r26,240 + mov r23,r20 + andi r23,15 + or r26,r23 + mov r29,r20 + andi r29,240 + andi r18,15 + or r29,r18 + mov r28,r21 + swap r28 + andi r28,240 + andi r19,15 + or r28,r19 + pop r20 + pop r21 + pop r18 + pop r19 + push r7 + push r6 + push r9 + push r8 + mov r7,r2 + ldi r17,240 + and r7,r17 + mov r23,r3 + swap r23 + andi r23,15 + or r7,r23 + mov r6,r5 + and r6,r17 + mov r23,r4 + andi r23,15 + or r6,r23 + mov r9,r4 + and r9,r17 + ldi r16,15 + and r2,r16 + or r9,r2 + mov r8,r5 + swap r8 + and r8,r17 + and r3,r16 + or r8,r3 + pop r4 + pop r5 + pop r2 + pop r3 + push r15 + push r14 + push r25 + push r24 + mov r15,r10 + and r15,r17 + mov r23,r11 + swap r23 + andi r23,15 + or r15,r23 + mov r14,r13 + and r14,r17 + mov r23,r12 + andi r23,15 + or r14,r23 + mov r25,r12 + andi r25,240 + and r10,r16 + or r25,r10 + mov r24,r13 + swap r24 + andi r24,240 + and r11,r16 + or r24,r11 + pop r12 + pop r13 + pop r10 + pop r11 + dec r22 + breq 5125f + rjmp 27b +5125: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r26 + std Z+5,r27 + std Z+6,r28 + std Z+7,r29 + std Z+8,r2 + std Z+9,r3 + std Z+10,r4 + std Z+11,r5 + std Z+12,r6 + std Z+13,r7 + std Z+14,r8 + std Z+15,r9 + std Z+16,r10 + std Z+17,r11 + std Z+18,r12 + std Z+19,r13 + std Z+20,r14 + std Z+21,r15 + std Z+22,r24 + std Z+23,r25 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_reverse_tk, .-forkskinny_64_192_reverse_tk + +#endif diff --git a/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/rhys/internal-forkskinny.c b/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/rhys/internal-forkskinny.c index b050ff1..6e2ac55 100644 --- a/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/rhys/internal-forkskinny.c +++ b/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/rhys/internal-forkskinny.c @@ -40,35 +40,10 @@ static unsigned char const RC[87] = { 0x4a, 0x14, 0x29, 0x52, 0x24, 0x48, 0x10 }; -/** - * \brief Number of rounds of ForkSkinny-128-256 before forking. - */ -#define FORKSKINNY_128_256_ROUNDS_BEFORE 21 - -/** - * \brief Number of rounds of ForkSkinny-128-256 after forking. - */ -#define FORKSKINNY_128_256_ROUNDS_AFTER 27 - -/** - * \brief State information for ForkSkinny-128-256. - */ -typedef struct -{ - uint32_t TK1[4]; /**< First part of the tweakey */ - uint32_t TK2[4]; /**< Second part of the tweakey */ - uint32_t S[4]; /**< Current block state */ +#if !defined(__AVR__) -} forkskinny_128_256_state_t; - -/** - * \brief Applies one round of ForkSkinny-128-256. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_256_round - (forkskinny_128_256_state_t *state, unsigned round) +void forkskinny_128_256_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -79,137 +54,52 @@ static void forkskinny_128_256_round s2 = state->S[2]; s3 = state->S[3]; - /* Apply the S-box to all cells in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny128_sbox(s0); + skinny128_sbox(s1); + skinny128_sbox(s2); + skinny128_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); + s2 ^= 0x02; + + /* Shift the cells in the rows right, which moves the cell + * values up closer to the MSB. That is, we do a left rotate + * on the word to rotate the cells in the word right */ + s1 = leftRotate8(s1); + s2 = leftRotate16(s2); + s3 = leftRotate24(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + + /* Permute TK1 and TK2 for the next round */ + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + } /* Save the local variables back to the state */ state->S[0] = s0; state->S[1] = s1; state->S[2] = s2; state->S[3] = s3; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(state->TK1); - skinny128_permute_tk(state->TK2); - skinny128_LFSR2(state->TK2[0]); - skinny128_LFSR2(state->TK2[1]); -} - -void forkskinny_128_256_encrypt - (const unsigned char key[32], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_256_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_128_256_ROUNDS_BEFORE; ++round) { - forkskinny_128_256_round(&state, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint32_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&state, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x08040201U; /* Branching constant */ - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - for (round = (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); ++round) { - forkskinny_128_256_round(&state, round); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&state, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - } } -/** - * \brief Applies one round of ForkSkinny-128-256 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_256_inv_round - (forkskinny_128_256_state_t *state, unsigned round) +void forkskinny_128_256_inv_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -220,39 +110,42 @@ static void forkskinny_128_256_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Permute TK1 and TK2 for the next round */ - skinny128_inv_LFSR2(state->TK2[0]); - skinny128_inv_LFSR2(state->TK2[1]); - skinny128_inv_permute_tk(state->TK1); - skinny128_inv_permute_tk(state->TK2); - - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left, which moves the cell - * values down closer to the LSB. That is, we do a right - * rotate on the word to rotate the cells in the word left */ - s1 = rightRotate8(s1); - s2 = rightRotate16(s2); - s3 = rightRotate24(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1 and TK2 for the next round */ + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left, which moves the cell + * values down closer to the LSB. That is, we do a right + * rotate on the word to rotate the cells in the word left */ + s1 = rightRotate8(s1); + s2 = rightRotate16(s2); + s3 = rightRotate24(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); + s2 ^= 0x02; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny128_inv_sbox(s0); + skinny128_inv_sbox(s1); + skinny128_inv_sbox(s2); + skinny128_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -261,115 +154,64 @@ static void forkskinny_128_256_inv_round state->S[3] = s3; } -void forkskinny_128_256_decrypt - (const unsigned char key[32], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_128_256_forward_tk + (forkskinny_128_256_state_t *state, unsigned rounds) { - forkskinny_128_256_state_t state; - forkskinny_128_256_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); ++round) { - skinny128_permute_tk(state.TK1); - skinny128_permute_tk(state.TK2); - skinny128_LFSR2(state.TK2[0]); - skinny128_LFSR2(state.TK2[1]); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR2(state->TK2[2]); + skinny128_LFSR2(state->TK2[3]); + } + rounds -= 16; } - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); - round > (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); --round) { - forkskinny_128_256_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + --rounds; } +} - /* Remove the branching constant */ - state.S[0] ^= 0x08040201U; - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_128_256_ROUNDS_AFTER; ++round) { - skinny128_inv_LFSR2(state.TK2[0]); - skinny128_inv_LFSR2(state.TK2[1]); - skinny128_inv_permute_tk(state.TK1); - skinny128_inv_permute_tk(state.TK2); +void forkskinny_128_256_reverse_tk + (forkskinny_128_256_state_t *state, unsigned rounds) +{ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR2(state->TK2[2]); + skinny128_inv_LFSR2(state->TK2[3]); + } + rounds -= 16; } - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_128_256_inv_round(&state, round - 1); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&fstate, round); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + --rounds; } - le_store_word32(output_right, fstate.S[0]); - le_store_word32(output_right + 4, fstate.S[1]); - le_store_word32(output_right + 8, fstate.S[2]); - le_store_word32(output_right + 12, fstate.S[3]); } -/** - * \brief Number of rounds of ForkSkinny-128-384 before forking. - */ -#define FORKSKINNY_128_384_ROUNDS_BEFORE 25 - -/** - * \brief Number of rounds of ForkSkinny-128-384 after forking. - */ -#define FORKSKINNY_128_384_ROUNDS_AFTER 31 - -/** - * \brief State information for ForkSkinny-128-384. - */ -typedef struct -{ - uint32_t TK1[4]; /**< First part of the tweakey */ - uint32_t TK2[4]; /**< Second part of the tweakey */ - uint32_t TK3[4]; /**< Third part of the tweakey */ - uint32_t S[4]; /**< Current block state */ - -} forkskinny_128_384_state_t; - -/** - * \brief Applies one round of ForkSkinny-128-384. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_384_round - (forkskinny_128_384_state_t *state, unsigned round) +void forkskinny_128_384_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -380,145 +222,56 @@ static void forkskinny_128_384_round s2 = state->S[2]; s3 = state->S[3]; - /* Apply the S-box to all cells in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - (rc & 0x0F) ^ 0x00020000; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny128_sbox(s0); + skinny128_sbox(s1); + skinny128_sbox(s2); + skinny128_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + (rc & 0x0F) ^ 0x00020000; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); + s2 ^= 0x02; + + /* Shift the cells in the rows right, which moves the cell + * values up closer to the MSB. That is, we do a left rotate + * on the word to rotate the cells in the word right */ + s1 = leftRotate8(s1); + s2 = leftRotate16(s2); + s3 = leftRotate24(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + + /* Permute TK1, TK2, and TK3 for the next round */ + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_permute_tk(state->TK3); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + } /* Save the local variables back to the state */ state->S[0] = s0; state->S[1] = s1; state->S[2] = s2; state->S[3] = s3; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny128_permute_tk(state->TK1); - skinny128_permute_tk(state->TK2); - skinny128_permute_tk(state->TK3); - skinny128_LFSR2(state->TK2[0]); - skinny128_LFSR2(state->TK2[1]); - skinny128_LFSR3(state->TK3[0]); - skinny128_LFSR3(state->TK3[1]); } -void forkskinny_128_384_encrypt - (const unsigned char key[48], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_384_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.TK3[0] = le_load_word32(key + 32); - state.TK3[1] = le_load_word32(key + 36); - state.TK3[2] = le_load_word32(key + 40); - state.TK3[3] = le_load_word32(key + 44); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_128_384_ROUNDS_BEFORE; ++round) { - forkskinny_128_384_round(&state, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint32_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&state, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x08040201U; /* Branching constant */ - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - for (round = (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); ++round) { - forkskinny_128_384_round(&state, round); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&state, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - } -} - -/** - * \brief Applies one round of ForkSkinny-128-384 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_384_inv_round - (forkskinny_128_384_state_t *state, unsigned round) +void forkskinny_128_384_inv_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -529,43 +282,46 @@ static void forkskinny_128_384_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Permute TK1 and TK2 for the next round */ - skinny128_inv_LFSR2(state->TK2[0]); - skinny128_inv_LFSR2(state->TK2[1]); - skinny128_inv_LFSR3(state->TK3[0]); - skinny128_inv_LFSR3(state->TK3[1]); - skinny128_inv_permute_tk(state->TK1); - skinny128_inv_permute_tk(state->TK2); - skinny128_inv_permute_tk(state->TK3); - - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left, which moves the cell - * values down closer to the LSB. That is, we do a right - * rotate on the word to rotate the cells in the word left */ - s1 = rightRotate8(s1); - s2 = rightRotate16(s2); - s3 = rightRotate24(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - (rc & 0x0F) ^ 0x00020000; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1 and TK2 for the next round */ + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + skinny128_inv_permute_tk(state->TK3); + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left, which moves the cell + * values down closer to the LSB. That is, we do a right + * rotate on the word to rotate the cells in the word left */ + s1 = rightRotate8(s1); + s2 = rightRotate16(s2); + s3 = rightRotate24(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + (rc & 0x0F) ^ 0x00020000; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); + s2 ^= 0x02; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny128_inv_sbox(s0); + skinny128_inv_sbox(s1); + skinny128_inv_sbox(s2); + skinny128_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -574,128 +330,78 @@ static void forkskinny_128_384_inv_round state->S[3] = s3; } -void forkskinny_128_384_decrypt - (const unsigned char key[48], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_128_384_forward_tk + (forkskinny_128_384_state_t *state, unsigned rounds) { - forkskinny_128_384_state_t state; - forkskinny_128_384_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.TK3[0] = le_load_word32(key + 32); - state.TK3[1] = le_load_word32(key + 36); - state.TK3[2] = le_load_word32(key + 40); - state.TK3[3] = le_load_word32(key + 44); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); ++round) { - skinny128_permute_tk(state.TK1); - skinny128_permute_tk(state.TK2); - skinny128_permute_tk(state.TK3); - skinny128_LFSR2(state.TK2[0]); - skinny128_LFSR2(state.TK2[1]); - skinny128_LFSR3(state.TK3[0]); - skinny128_LFSR3(state.TK3[1]); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR2(state->TK2[2]); + skinny128_LFSR2(state->TK2[3]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + skinny128_LFSR3(state->TK3[2]); + skinny128_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); - round > (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); --round) { - forkskinny_128_384_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_permute_tk(state->TK3); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + --rounds; } +} - /* Remove the branching constant */ - state.S[0] ^= 0x08040201U; - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_128_384_ROUNDS_AFTER; ++round) { - skinny128_inv_LFSR2(state.TK2[0]); - skinny128_inv_LFSR2(state.TK2[1]); - skinny128_inv_LFSR3(state.TK3[0]); - skinny128_inv_LFSR3(state.TK3[1]); - skinny128_inv_permute_tk(state.TK1); - skinny128_inv_permute_tk(state.TK2); - skinny128_inv_permute_tk(state.TK3); +void forkskinny_128_384_reverse_tk + (forkskinny_128_384_state_t *state, unsigned rounds) +{ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR2(state->TK2[2]); + skinny128_inv_LFSR2(state->TK2[3]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_LFSR3(state->TK3[2]); + skinny128_inv_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_128_384_inv_round(&state, round - 1); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&fstate, round); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + skinny128_inv_permute_tk(state->TK3); + --rounds; } - le_store_word32(output_right, fstate.S[0]); - le_store_word32(output_right + 4, fstate.S[1]); - le_store_word32(output_right + 8, fstate.S[2]); - le_store_word32(output_right + 12, fstate.S[3]); } -/** - * \brief Number of rounds of ForkSkinny-64-192 before forking. - */ -#define FORKSKINNY_64_192_ROUNDS_BEFORE 17 - -/** - * \brief Number of rounds of ForkSkinny-64-192 after forking. - */ -#define FORKSKINNY_64_192_ROUNDS_AFTER 23 - -/** - * \brief State information for ForkSkinny-64-192. - */ -typedef struct -{ - uint16_t TK1[4]; /**< First part of the tweakey */ - uint16_t TK2[4]; /**< Second part of the tweakey */ - uint16_t TK3[4]; /**< Third part of the tweakey */ - uint16_t S[4]; /**< Current block state */ - -} forkskinny_64_192_state_t; - -/** - * \brief Applies one round of ForkSkinny-64-192. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - * - * Note: The cells of each row are order in big-endian nibble order - * so it is easiest to manage the rows in bit-endian byte order. - */ -static void forkskinny_64_192_round - (forkskinny_64_192_state_t *state, unsigned round) +void forkskinny_64_192_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last) { uint16_t s0, s1, s2, s3, temp; uint8_t rc; @@ -706,144 +412,55 @@ static void forkskinny_64_192_round s2 = state->S[2]; s3 = state->S[3]; - /* Apply the S-box to all cells in the state */ - skinny64_sbox(s0); - skinny64_sbox(s1); - skinny64_sbox(s2); - skinny64_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - ((rc & 0x0F) << 12) ^ 0x0020; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ - ((rc & 0x70) << 8); - s2 ^= 0x2000; - - /* Shift the cells in the rows right */ - s1 = rightRotate4_16(s1); - s2 = rightRotate8_16(s2); - s3 = rightRotate12_16(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny64_sbox(s0); + skinny64_sbox(s1); + skinny64_sbox(s2); + skinny64_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + ((rc & 0x0F) << 12) ^ 0x0020; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ + ((rc & 0x70) << 8); + s2 ^= 0x2000; + + /* Shift the cells in the rows right */ + s1 = rightRotate4_16(s1); + s2 = rightRotate8_16(s2); + s3 = rightRotate12_16(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + + /* Permute TK1, TK2, and TK3 for the next round */ + skinny64_permute_tk(state->TK1); + skinny64_permute_tk(state->TK2); + skinny64_permute_tk(state->TK3); + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + } /* Save the local variables back to the state */ state->S[0] = s0; state->S[1] = s1; state->S[2] = s2; state->S[3] = s3; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny64_permute_tk(state->TK1); - skinny64_permute_tk(state->TK2); - skinny64_permute_tk(state->TK3); - skinny64_LFSR2(state->TK2[0]); - skinny64_LFSR2(state->TK2[1]); - skinny64_LFSR3(state->TK3[0]); - skinny64_LFSR3(state->TK3[1]); } -void forkskinny_64_192_encrypt - (const unsigned char key[24], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_64_192_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = be_load_word16(key); - state.TK1[1] = be_load_word16(key + 2); - state.TK1[2] = be_load_word16(key + 4); - state.TK1[3] = be_load_word16(key + 6); - state.TK2[0] = be_load_word16(key + 8); - state.TK2[1] = be_load_word16(key + 10); - state.TK2[2] = be_load_word16(key + 12); - state.TK2[3] = be_load_word16(key + 14); - state.TK3[0] = be_load_word16(key + 16); - state.TK3[1] = be_load_word16(key + 18); - state.TK3[2] = be_load_word16(key + 20); - state.TK3[3] = be_load_word16(key + 22); - state.S[0] = be_load_word16(input); - state.S[1] = be_load_word16(input + 2); - state.S[2] = be_load_word16(input + 4); - state.S[3] = be_load_word16(input + 6); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_64_192_ROUNDS_BEFORE; ++round) { - forkskinny_64_192_round(&state, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint16_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_right, state.S[0]); - be_store_word16(output_right + 2, state.S[1]); - be_store_word16(output_right + 4, state.S[2]); - be_store_word16(output_right + 6, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x1249U; /* Branching constant */ - state.S[1] ^= 0x36daU; - state.S[2] ^= 0x5b7fU; - state.S[3] ^= 0xec81U; - for (round = (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_left, state.S[0]); - be_store_word16(output_left + 2, state.S[1]); - be_store_word16(output_left + 4, state.S[2]); - be_store_word16(output_left + 6, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_right, state.S[0]); - be_store_word16(output_right + 2, state.S[1]); - be_store_word16(output_right + 4, state.S[2]); - be_store_word16(output_right + 6, state.S[3]); - } -} - -/** - * \brief Applies one round of ForkSkinny-64-192 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_64_192_inv_round - (forkskinny_64_192_state_t *state, unsigned round) +void forkskinny_64_192_inv_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last) { uint16_t s0, s1, s2, s3, temp; uint8_t rc; @@ -854,42 +471,45 @@ static void forkskinny_64_192_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Permute TK1, TK2, and TK3 for the next round */ - skinny64_inv_LFSR2(state->TK2[0]); - skinny64_inv_LFSR2(state->TK2[1]); - skinny64_inv_LFSR3(state->TK3[0]); - skinny64_inv_LFSR3(state->TK3[1]); - skinny64_inv_permute_tk(state->TK1); - skinny64_inv_permute_tk(state->TK2); - skinny64_inv_permute_tk(state->TK3); - - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left */ - s1 = leftRotate4_16(s1); - s2 = leftRotate8_16(s2); - s3 = leftRotate12_16(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - ((rc & 0x0F) << 12) ^ 0x0020; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ - ((rc & 0x70) << 8); - s2 ^= 0x2000; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny64_inv_sbox(s0); - skinny64_inv_sbox(s1); - skinny64_inv_sbox(s2); - skinny64_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1, TK2, and TK3 for the next round */ + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_permute_tk(state->TK1); + skinny64_inv_permute_tk(state->TK2); + skinny64_inv_permute_tk(state->TK3); + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left */ + s1 = leftRotate4_16(s1); + s2 = leftRotate8_16(s2); + s3 = leftRotate12_16(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + ((rc & 0x0F) << 12) ^ 0x0020; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ + ((rc & 0x70) << 8); + s2 ^= 0x2000; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny64_inv_sbox(s0); + skinny64_inv_sbox(s1); + skinny64_inv_sbox(s2); + skinny64_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -898,91 +518,74 @@ static void forkskinny_64_192_inv_round state->S[3] = s3; } -void forkskinny_64_192_decrypt - (const unsigned char key[24], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_64_192_forward_tk + (forkskinny_64_192_state_t *state, unsigned rounds) { - forkskinny_64_192_state_t state; - forkskinny_64_192_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = be_load_word16(key); - state.TK1[1] = be_load_word16(key + 2); - state.TK1[2] = be_load_word16(key + 4); - state.TK1[3] = be_load_word16(key + 6); - state.TK2[0] = be_load_word16(key + 8); - state.TK2[1] = be_load_word16(key + 10); - state.TK2[2] = be_load_word16(key + 12); - state.TK2[3] = be_load_word16(key + 14); - state.TK3[0] = be_load_word16(key + 16); - state.TK3[1] = be_load_word16(key + 18); - state.TK3[2] = be_load_word16(key + 20); - state.TK3[3] = be_load_word16(key + 22); - state.S[0] = be_load_word16(input); - state.S[1] = be_load_word16(input + 2); - state.S[2] = be_load_word16(input + 4); - state.S[3] = be_load_word16(input + 6); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); ++round) { - skinny64_permute_tk(state.TK1); - skinny64_permute_tk(state.TK2); - skinny64_permute_tk(state.TK3); - skinny64_LFSR2(state.TK2[0]); - skinny64_LFSR2(state.TK2[1]); - skinny64_LFSR3(state.TK3[0]); - skinny64_LFSR3(state.TK3[1]); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny64_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR2(state->TK2[2]); + skinny64_LFSR2(state->TK2[3]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + skinny64_LFSR3(state->TK3[2]); + skinny64_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); - round > (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); --round) { - forkskinny_64_192_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny64_permute_tk(state->TK1); + skinny64_permute_tk(state->TK2); + skinny64_permute_tk(state->TK3); + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + --rounds; } +} - /* Remove the branching constant */ - state.S[0] ^= 0x1249U; - state.S[1] ^= 0x36daU; - state.S[2] ^= 0x5b7fU; - state.S[3] ^= 0xec81U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_64_192_ROUNDS_AFTER; ++round) { - skinny64_inv_LFSR2(state.TK2[0]); - skinny64_inv_LFSR2(state.TK2[1]); - skinny64_inv_LFSR3(state.TK3[0]); - skinny64_inv_LFSR3(state.TK3[1]); - skinny64_inv_permute_tk(state.TK1); - skinny64_inv_permute_tk(state.TK2); - skinny64_inv_permute_tk(state.TK3); +void forkskinny_64_192_reverse_tk + (forkskinny_64_192_state_t *state, unsigned rounds) +{ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny64_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR2(state->TK2[2]); + skinny64_inv_LFSR2(state->TK2[3]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_LFSR3(state->TK3[2]); + skinny64_inv_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_64_192_inv_round(&state, round - 1); - } - be_store_word16(output_left, state.S[0]); - be_store_word16(output_left + 2, state.S[1]); - be_store_word16(output_left + 4, state.S[2]); - be_store_word16(output_left + 6, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&fstate, round); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_permute_tk(state->TK1); + skinny64_inv_permute_tk(state->TK2); + skinny64_inv_permute_tk(state->TK3); + --rounds; } - be_store_word16(output_right, fstate.S[0]); - be_store_word16(output_right + 2, fstate.S[1]); - be_store_word16(output_right + 4, fstate.S[2]); - be_store_word16(output_right + 6, fstate.S[3]); } + +#endif /* !__AVR__ */ diff --git a/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/rhys/internal-forkskinny.h b/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/rhys/internal-forkskinny.h index 0c1a707..e3014d3 100644 --- a/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/rhys/internal-forkskinny.h +++ b/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/rhys/internal-forkskinny.h @@ -23,6 +23,8 @@ #ifndef LW_INTERNAL_FORKSKINNY_H #define LW_INTERNAL_FORKSKINNY_H +#include "internal-util.h" + /** * \file internal-forkskinny.h * \brief ForkSkinny block cipher family. @@ -39,6 +41,158 @@ extern "C" { #endif /** + * \brief State information for ForkSkinny-128-256. + */ +typedef struct +{ + uint32_t TK1[4]; /**< First part of the tweakey */ + uint32_t TK2[4]; /**< Second part of the tweakey */ + uint32_t S[4]; /**< Current block state */ + +} forkskinny_128_256_state_t; + +/** + * \brief State information for ForkSkinny-128-384. + */ +typedef struct +{ + uint32_t TK1[4]; /**< First part of the tweakey */ + uint32_t TK2[4]; /**< Second part of the tweakey */ + uint32_t TK3[4]; /**< Third part of the tweakey */ + uint32_t S[4]; /**< Current block state */ + +} forkskinny_128_384_state_t; + +/** + * \brief State information for ForkSkinny-64-192. + */ +typedef struct +{ + uint16_t TK1[4]; /**< First part of the tweakey */ + uint16_t TK2[4]; /**< Second part of the tweakey */ + uint16_t TK3[4]; /**< Third part of the tweakey */ + uint16_t S[4]; /**< Current block state */ + +} forkskinny_64_192_state_t; + +/** + * \brief Applies several rounds of ForkSkinny-128-256. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + */ +void forkskinny_128_256_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-128-256 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_128_256_inv_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-128-256. + * + * \param state Points to the ForkSkinny-128-256 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_128_256_forward_tk + (forkskinny_128_256_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-128-256. + * + * \param state Points to the ForkSkinny-128-256 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_128_256_reverse_tk + (forkskinny_128_256_state_t *state, unsigned rounds); + +/** + * \brief Applies several rounds of ForkSkinny-128-384. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + */ +void forkskinny_128_384_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-128-384 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_128_384_inv_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-128-384. + * + * \param state Points to the ForkSkinny-128-384 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_128_384_forward_tk + (forkskinny_128_384_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-128-384. + * + * \param state Points to the ForkSkinny-128-384 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_128_384_reverse_tk + (forkskinny_128_384_state_t *state, unsigned rounds); + +/** + * \brief Applies several rounds of ForkSkinny-64-192. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + * + * Note: The cells of each row are ordered in big-endian nibble order + * so it is simplest to manage the rows in big-endian byte order. + */ +void forkskinny_64_192_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-64-192 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_64_192_inv_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-64-192. + * + * \param state Points to the ForkSkinny-64-192 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_64_192_forward_tk + (forkskinny_64_192_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-64-192. + * + * \param state Points to the ForkSkinny-64-192 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_64_192_reverse_tk + (forkskinny_64_192_state_t *state, unsigned rounds); + +/** * \brief Encrypts a block of plaintext with ForkSkinny-128-256. * * \param key 256-bit tweakey for ForkSkinny-128-256. diff --git a/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/rhys/internal-skinnyutil.h b/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/rhys/internal-skinnyutil.h index 83136cb..8a5296d 100644 --- a/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/rhys/internal-skinnyutil.h +++ b/forkae/Implementations/crypto_aead/saefforkskinnyb128t192n56v1/rhys/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: diff --git a/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/rhys/forkae.c b/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/rhys/forkae.c index 4a9671a..49e7610 100644 --- a/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/rhys/forkae.c +++ b/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/rhys/forkae.c @@ -22,7 +22,6 @@ #include "forkae.h" #include "internal-forkskinny.h" -#include "internal-util.h" #include aead_cipher_t const forkae_paef_64_192_cipher = { @@ -138,3 +137,476 @@ aead_cipher_t const forkae_saef_128_256_cipher = { #define FORKAE_TWEAKEY_REDUCED_SIZE 32 #define FORKAE_BLOCK_FUNC forkskinny_128_256 #include "internal-forkae-saef.h" + +/* Helper functions to implement the forking encrypt/decrypt block operations + * on top of the basic "perform N rounds" functions in internal-forkskinny.c */ + +/** + * \brief Number of rounds of ForkSkinny-128-256 before forking. + */ +#define FORKSKINNY_128_256_ROUNDS_BEFORE 21 + +/** + * \brief Number of rounds of ForkSkinny-128-256 after forking. + */ +#define FORKSKINNY_128_256_ROUNDS_AFTER 27 + +void forkskinny_128_256_encrypt + (const unsigned char key[32], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_256_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Run all of the rounds before the forking point */ + forkskinny_128_256_rounds(&state, 0, FORKSKINNY_128_256_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint32_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x08040201U; /* Branching constant */ + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_128_256_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + } +} + +void forkskinny_128_256_decrypt + (const unsigned char key[32], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_256_state_t state; + forkskinny_128_256_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_128_256_forward_tk + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_128_256_inv_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER * 2, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x08040201U; + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_128_256_reverse_tk(&state, FORKSKINNY_128_256_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_128_256_inv_rounds + (&state, FORKSKINNY_128_256_ROUNDS_BEFORE, 0); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_128_256_rounds + (&fstate, FORKSKINNY_128_256_ROUNDS_BEFORE, + FORKSKINNY_128_256_ROUNDS_BEFORE + + FORKSKINNY_128_256_ROUNDS_AFTER); + le_store_word32(output_right, fstate.S[0]); + le_store_word32(output_right + 4, fstate.S[1]); + le_store_word32(output_right + 8, fstate.S[2]); + le_store_word32(output_right + 12, fstate.S[3]); +} + +/** + * \brief Number of rounds of ForkSkinny-128-384 before forking. + */ +#define FORKSKINNY_128_384_ROUNDS_BEFORE 25 + +/** + * \brief Number of rounds of ForkSkinny-128-384 after forking. + */ +#define FORKSKINNY_128_384_ROUNDS_AFTER 31 + +void forkskinny_128_384_encrypt + (const unsigned char key[48], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_384_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.TK3[0] = le_load_word32(key + 32); + state.TK3[1] = le_load_word32(key + 36); + state.TK3[2] = le_load_word32(key + 40); + state.TK3[3] = le_load_word32(key + 44); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Run all of the rounds before the forking point */ + forkskinny_128_384_rounds(&state, 0, FORKSKINNY_128_384_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint32_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x08040201U; /* Branching constant */ + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_128_384_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, state.S[0]); + le_store_word32(output_right + 4, state.S[1]); + le_store_word32(output_right + 8, state.S[2]); + le_store_word32(output_right + 12, state.S[3]); + } +} + +void forkskinny_128_384_decrypt + (const unsigned char key[48], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_128_384_state_t state; + forkskinny_128_384_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = le_load_word32(key); + state.TK1[1] = le_load_word32(key + 4); + state.TK1[2] = le_load_word32(key + 8); + state.TK1[3] = le_load_word32(key + 12); + state.TK2[0] = le_load_word32(key + 16); + state.TK2[1] = le_load_word32(key + 20); + state.TK2[2] = le_load_word32(key + 24); + state.TK2[3] = le_load_word32(key + 28); + state.TK3[0] = le_load_word32(key + 32); + state.TK3[1] = le_load_word32(key + 36); + state.TK3[2] = le_load_word32(key + 40); + state.TK3[3] = le_load_word32(key + 44); + state.S[0] = le_load_word32(input); + state.S[1] = le_load_word32(input + 4); + state.S[2] = le_load_word32(input + 8); + state.S[3] = le_load_word32(input + 12); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_128_384_forward_tk + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_128_384_inv_rounds + (&state, FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER * 2, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x08040201U; + state.S[1] ^= 0x82412010U; + state.S[2] ^= 0x28140a05U; + state.S[3] ^= 0x8844a251U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_128_384_reverse_tk(&state, FORKSKINNY_128_384_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_128_384_inv_rounds(&state, FORKSKINNY_128_384_ROUNDS_BEFORE, 0); + le_store_word32(output_left, state.S[0]); + le_store_word32(output_left + 4, state.S[1]); + le_store_word32(output_left + 8, state.S[2]); + le_store_word32(output_left + 12, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_128_384_rounds + (&fstate, FORKSKINNY_128_384_ROUNDS_BEFORE, + FORKSKINNY_128_384_ROUNDS_BEFORE + + FORKSKINNY_128_384_ROUNDS_AFTER); + le_store_word32(output_right, fstate.S[0]); + le_store_word32(output_right + 4, fstate.S[1]); + le_store_word32(output_right + 8, fstate.S[2]); + le_store_word32(output_right + 12, fstate.S[3]); +} + +/** + * \brief Number of rounds of ForkSkinny-64-192 before forking. + */ +#define FORKSKINNY_64_192_ROUNDS_BEFORE 17 + +/** + * \brief Number of rounds of ForkSkinny-64-192 after forking. + */ +#define FORKSKINNY_64_192_ROUNDS_AFTER 23 + +void forkskinny_64_192_encrypt + (const unsigned char key[24], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_64_192_state_t state; + + /* Unpack the tweakey and the input */ + state.TK1[0] = be_load_word16(key); + state.TK1[1] = be_load_word16(key + 2); + state.TK1[2] = be_load_word16(key + 4); + state.TK1[3] = be_load_word16(key + 6); + state.TK2[0] = be_load_word16(key + 8); + state.TK2[1] = be_load_word16(key + 10); + state.TK2[2] = be_load_word16(key + 12); + state.TK2[3] = be_load_word16(key + 14); + state.TK3[0] = be_load_word16(key + 16); + state.TK3[1] = be_load_word16(key + 18); + state.TK3[2] = be_load_word16(key + 20); + state.TK3[3] = be_load_word16(key + 22); + state.S[0] = be_load_word16(input); + state.S[1] = be_load_word16(input + 2); + state.S[2] = be_load_word16(input + 4); + state.S[3] = be_load_word16(input + 6); + + /* Run all of the rounds before the forking point */ + forkskinny_64_192_rounds(&state, 0, FORKSKINNY_64_192_ROUNDS_BEFORE); + + /* Determine which output blocks we need */ + if (output_left && output_right) { + /* We need both outputs so save the state at the forking point */ + uint16_t F[4]; + F[0] = state.S[0]; + F[1] = state.S[1]; + F[2] = state.S[2]; + F[3] = state.S[3]; + + /* Generate the right output block */ + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, state.S[0]); + be_store_word16(output_right + 2, state.S[1]); + be_store_word16(output_right + 4, state.S[2]); + be_store_word16(output_right + 6, state.S[3]); + + /* Restore the state at the forking point */ + state.S[0] = F[0]; + state.S[1] = F[1]; + state.S[2] = F[2]; + state.S[3] = F[3]; + } + if (output_left) { + /* Generate the left output block */ + state.S[0] ^= 0x1249U; /* Branching constant */ + state.S[1] ^= 0x36daU; + state.S[2] ^= 0x5b7fU; + state.S[3] ^= 0xec81U; + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2); + be_store_word16(output_left, state.S[0]); + be_store_word16(output_left + 2, state.S[1]); + be_store_word16(output_left + 4, state.S[2]); + be_store_word16(output_left + 6, state.S[3]); + } else { + /* We only need the right output block */ + forkskinny_64_192_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, state.S[0]); + be_store_word16(output_right + 2, state.S[1]); + be_store_word16(output_right + 4, state.S[2]); + be_store_word16(output_right + 6, state.S[3]); + } +} + +void forkskinny_64_192_decrypt + (const unsigned char key[24], unsigned char *output_left, + unsigned char *output_right, const unsigned char *input) +{ + forkskinny_64_192_state_t state; + forkskinny_64_192_state_t fstate; + + /* Unpack the tweakey and the input */ + state.TK1[0] = be_load_word16(key); + state.TK1[1] = be_load_word16(key + 2); + state.TK1[2] = be_load_word16(key + 4); + state.TK1[3] = be_load_word16(key + 6); + state.TK2[0] = be_load_word16(key + 8); + state.TK2[1] = be_load_word16(key + 10); + state.TK2[2] = be_load_word16(key + 12); + state.TK2[3] = be_load_word16(key + 14); + state.TK3[0] = be_load_word16(key + 16); + state.TK3[1] = be_load_word16(key + 18); + state.TK3[2] = be_load_word16(key + 20); + state.TK3[3] = be_load_word16(key + 22); + state.S[0] = be_load_word16(input); + state.S[1] = be_load_word16(input + 2); + state.S[2] = be_load_word16(input + 4); + state.S[3] = be_load_word16(input + 6); + + /* Fast-forward the tweakey to the end of the key schedule */ + forkskinny_64_192_forward_tk + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2); + + /* Perform the "after" rounds on the input to get back + * to the forking point in the cipher */ + forkskinny_64_192_inv_rounds + (&state, FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER * 2, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + + /* Remove the branching constant */ + state.S[0] ^= 0x1249U; + state.S[1] ^= 0x36daU; + state.S[2] ^= 0x5b7fU; + state.S[3] ^= 0xec81U; + + /* Roll the tweakey back another "after" rounds */ + forkskinny_64_192_reverse_tk(&state, FORKSKINNY_64_192_ROUNDS_AFTER); + + /* Save the state and the tweakey at the forking point */ + fstate = state; + + /* Generate the left output block after another "before" rounds */ + forkskinny_64_192_inv_rounds(&state, FORKSKINNY_64_192_ROUNDS_BEFORE, 0); + be_store_word16(output_left, state.S[0]); + be_store_word16(output_left + 2, state.S[1]); + be_store_word16(output_left + 4, state.S[2]); + be_store_word16(output_left + 6, state.S[3]); + + /* Generate the right output block by going forward "after" + * rounds from the forking point */ + forkskinny_64_192_rounds + (&fstate, FORKSKINNY_64_192_ROUNDS_BEFORE, + FORKSKINNY_64_192_ROUNDS_BEFORE + + FORKSKINNY_64_192_ROUNDS_AFTER); + be_store_word16(output_right, fstate.S[0]); + be_store_word16(output_right + 2, fstate.S[1]); + be_store_word16(output_right + 4, fstate.S[2]); + be_store_word16(output_right + 6, fstate.S[3]); +} diff --git a/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/rhys/internal-forkskinny-avr.S b/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/rhys/internal-forkskinny-avr.S new file mode 100644 index 0000000..c7e0b37 --- /dev/null +++ b/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/rhys/internal-forkskinny-avr.S @@ -0,0 +1,8880 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_0, @object + .size table_0, 256 +table_0: + .byte 101 + .byte 76 + .byte 106 + .byte 66 + .byte 75 + .byte 99 + .byte 67 + .byte 107 + .byte 85 + .byte 117 + .byte 90 + .byte 122 + .byte 83 + .byte 115 + .byte 91 + .byte 123 + .byte 53 + .byte 140 + .byte 58 + .byte 129 + .byte 137 + .byte 51 + .byte 128 + .byte 59 + .byte 149 + .byte 37 + .byte 152 + .byte 42 + .byte 144 + .byte 35 + .byte 153 + .byte 43 + .byte 229 + .byte 204 + .byte 232 + .byte 193 + .byte 201 + .byte 224 + .byte 192 + .byte 233 + .byte 213 + .byte 245 + .byte 216 + .byte 248 + .byte 208 + .byte 240 + .byte 217 + .byte 249 + .byte 165 + .byte 28 + .byte 168 + .byte 18 + .byte 27 + .byte 160 + .byte 19 + .byte 169 + .byte 5 + .byte 181 + .byte 10 + .byte 184 + .byte 3 + .byte 176 + .byte 11 + .byte 185 + .byte 50 + .byte 136 + .byte 60 + .byte 133 + .byte 141 + .byte 52 + .byte 132 + .byte 61 + .byte 145 + .byte 34 + .byte 156 + .byte 44 + .byte 148 + .byte 36 + .byte 157 + .byte 45 + .byte 98 + .byte 74 + .byte 108 + .byte 69 + .byte 77 + .byte 100 + .byte 68 + .byte 109 + .byte 82 + .byte 114 + .byte 92 + .byte 124 + .byte 84 + .byte 116 + .byte 93 + .byte 125 + .byte 161 + .byte 26 + .byte 172 + .byte 21 + .byte 29 + .byte 164 + .byte 20 + .byte 173 + .byte 2 + .byte 177 + .byte 12 + .byte 188 + .byte 4 + .byte 180 + .byte 13 + .byte 189 + .byte 225 + .byte 200 + .byte 236 + .byte 197 + .byte 205 + .byte 228 + .byte 196 + .byte 237 + .byte 209 + .byte 241 + .byte 220 + .byte 252 + .byte 212 + .byte 244 + .byte 221 + .byte 253 + .byte 54 + .byte 142 + .byte 56 + .byte 130 + .byte 139 + .byte 48 + .byte 131 + .byte 57 + .byte 150 + .byte 38 + .byte 154 + .byte 40 + .byte 147 + .byte 32 + .byte 155 + .byte 41 + .byte 102 + .byte 78 + .byte 104 + .byte 65 + .byte 73 + .byte 96 + .byte 64 + .byte 105 + .byte 86 + .byte 118 + .byte 88 + .byte 120 + .byte 80 + .byte 112 + .byte 89 + .byte 121 + .byte 166 + .byte 30 + .byte 170 + .byte 17 + .byte 25 + .byte 163 + .byte 16 + .byte 171 + .byte 6 + .byte 182 + .byte 8 + .byte 186 + .byte 0 + .byte 179 + .byte 9 + .byte 187 + .byte 230 + .byte 206 + .byte 234 + .byte 194 + .byte 203 + .byte 227 + .byte 195 + .byte 235 + .byte 214 + .byte 246 + .byte 218 + .byte 250 + .byte 211 + .byte 243 + .byte 219 + .byte 251 + .byte 49 + .byte 138 + .byte 62 + .byte 134 + .byte 143 + .byte 55 + .byte 135 + .byte 63 + .byte 146 + .byte 33 + .byte 158 + .byte 46 + .byte 151 + .byte 39 + .byte 159 + .byte 47 + .byte 97 + .byte 72 + .byte 110 + .byte 70 + .byte 79 + .byte 103 + .byte 71 + .byte 111 + .byte 81 + .byte 113 + .byte 94 + .byte 126 + .byte 87 + .byte 119 + .byte 95 + .byte 127 + .byte 162 + .byte 24 + .byte 174 + .byte 22 + .byte 31 + .byte 167 + .byte 23 + .byte 175 + .byte 1 + .byte 178 + .byte 14 + .byte 190 + .byte 7 + .byte 183 + .byte 15 + .byte 191 + .byte 226 + .byte 202 + .byte 238 + .byte 198 + .byte 207 + .byte 231 + .byte 199 + .byte 239 + .byte 210 + .byte 242 + .byte 222 + .byte 254 + .byte 215 + .byte 247 + .byte 223 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_1, @object + .size table_1, 256 +table_1: + .byte 172 + .byte 232 + .byte 104 + .byte 60 + .byte 108 + .byte 56 + .byte 168 + .byte 236 + .byte 170 + .byte 174 + .byte 58 + .byte 62 + .byte 106 + .byte 110 + .byte 234 + .byte 238 + .byte 166 + .byte 163 + .byte 51 + .byte 54 + .byte 102 + .byte 99 + .byte 227 + .byte 230 + .byte 225 + .byte 164 + .byte 97 + .byte 52 + .byte 49 + .byte 100 + .byte 161 + .byte 228 + .byte 141 + .byte 201 + .byte 73 + .byte 29 + .byte 77 + .byte 25 + .byte 137 + .byte 205 + .byte 139 + .byte 143 + .byte 27 + .byte 31 + .byte 75 + .byte 79 + .byte 203 + .byte 207 + .byte 133 + .byte 192 + .byte 64 + .byte 21 + .byte 69 + .byte 16 + .byte 128 + .byte 197 + .byte 130 + .byte 135 + .byte 18 + .byte 23 + .byte 66 + .byte 71 + .byte 194 + .byte 199 + .byte 150 + .byte 147 + .byte 3 + .byte 6 + .byte 86 + .byte 83 + .byte 211 + .byte 214 + .byte 209 + .byte 148 + .byte 81 + .byte 4 + .byte 1 + .byte 84 + .byte 145 + .byte 212 + .byte 156 + .byte 216 + .byte 88 + .byte 12 + .byte 92 + .byte 8 + .byte 152 + .byte 220 + .byte 154 + .byte 158 + .byte 10 + .byte 14 + .byte 90 + .byte 94 + .byte 218 + .byte 222 + .byte 149 + .byte 208 + .byte 80 + .byte 5 + .byte 85 + .byte 0 + .byte 144 + .byte 213 + .byte 146 + .byte 151 + .byte 2 + .byte 7 + .byte 82 + .byte 87 + .byte 210 + .byte 215 + .byte 157 + .byte 217 + .byte 89 + .byte 13 + .byte 93 + .byte 9 + .byte 153 + .byte 221 + .byte 155 + .byte 159 + .byte 11 + .byte 15 + .byte 91 + .byte 95 + .byte 219 + .byte 223 + .byte 22 + .byte 19 + .byte 131 + .byte 134 + .byte 70 + .byte 67 + .byte 195 + .byte 198 + .byte 65 + .byte 20 + .byte 193 + .byte 132 + .byte 17 + .byte 68 + .byte 129 + .byte 196 + .byte 28 + .byte 72 + .byte 200 + .byte 140 + .byte 76 + .byte 24 + .byte 136 + .byte 204 + .byte 26 + .byte 30 + .byte 138 + .byte 142 + .byte 74 + .byte 78 + .byte 202 + .byte 206 + .byte 53 + .byte 96 + .byte 224 + .byte 165 + .byte 101 + .byte 48 + .byte 160 + .byte 229 + .byte 50 + .byte 55 + .byte 162 + .byte 167 + .byte 98 + .byte 103 + .byte 226 + .byte 231 + .byte 61 + .byte 105 + .byte 233 + .byte 173 + .byte 109 + .byte 57 + .byte 169 + .byte 237 + .byte 59 + .byte 63 + .byte 171 + .byte 175 + .byte 107 + .byte 111 + .byte 235 + .byte 239 + .byte 38 + .byte 35 + .byte 179 + .byte 182 + .byte 118 + .byte 115 + .byte 243 + .byte 246 + .byte 113 + .byte 36 + .byte 241 + .byte 180 + .byte 33 + .byte 116 + .byte 177 + .byte 244 + .byte 44 + .byte 120 + .byte 248 + .byte 188 + .byte 124 + .byte 40 + .byte 184 + .byte 252 + .byte 42 + .byte 46 + .byte 186 + .byte 190 + .byte 122 + .byte 126 + .byte 250 + .byte 254 + .byte 37 + .byte 112 + .byte 240 + .byte 181 + .byte 117 + .byte 32 + .byte 176 + .byte 245 + .byte 34 + .byte 39 + .byte 178 + .byte 183 + .byte 114 + .byte 119 + .byte 242 + .byte 247 + .byte 45 + .byte 121 + .byte 249 + .byte 189 + .byte 125 + .byte 41 + .byte 185 + .byte 253 + .byte 43 + .byte 47 + .byte 187 + .byte 191 + .byte 123 + .byte 127 + .byte 251 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_2, @object + .size table_2, 256 +table_2: + .byte 0 + .byte 2 + .byte 4 + .byte 6 + .byte 8 + .byte 10 + .byte 12 + .byte 14 + .byte 16 + .byte 18 + .byte 20 + .byte 22 + .byte 24 + .byte 26 + .byte 28 + .byte 30 + .byte 32 + .byte 34 + .byte 36 + .byte 38 + .byte 40 + .byte 42 + .byte 44 + .byte 46 + .byte 48 + .byte 50 + .byte 52 + .byte 54 + .byte 56 + .byte 58 + .byte 60 + .byte 62 + .byte 65 + .byte 67 + .byte 69 + .byte 71 + .byte 73 + .byte 75 + .byte 77 + .byte 79 + .byte 81 + .byte 83 + .byte 85 + .byte 87 + .byte 89 + .byte 91 + .byte 93 + .byte 95 + .byte 97 + .byte 99 + .byte 101 + .byte 103 + .byte 105 + .byte 107 + .byte 109 + .byte 111 + .byte 113 + .byte 115 + .byte 117 + .byte 119 + .byte 121 + .byte 123 + .byte 125 + .byte 127 + .byte 128 + .byte 130 + .byte 132 + .byte 134 + .byte 136 + .byte 138 + .byte 140 + .byte 142 + .byte 144 + .byte 146 + .byte 148 + .byte 150 + .byte 152 + .byte 154 + .byte 156 + .byte 158 + .byte 160 + .byte 162 + .byte 164 + .byte 166 + .byte 168 + .byte 170 + .byte 172 + .byte 174 + .byte 176 + .byte 178 + .byte 180 + .byte 182 + .byte 184 + .byte 186 + .byte 188 + .byte 190 + .byte 193 + .byte 195 + .byte 197 + .byte 199 + .byte 201 + .byte 203 + .byte 205 + .byte 207 + .byte 209 + .byte 211 + .byte 213 + .byte 215 + .byte 217 + .byte 219 + .byte 221 + .byte 223 + .byte 225 + .byte 227 + .byte 229 + .byte 231 + .byte 233 + .byte 235 + .byte 237 + .byte 239 + .byte 241 + .byte 243 + .byte 245 + .byte 247 + .byte 249 + .byte 251 + .byte 253 + .byte 255 + .byte 1 + .byte 3 + .byte 5 + .byte 7 + .byte 9 + .byte 11 + .byte 13 + .byte 15 + .byte 17 + .byte 19 + .byte 21 + .byte 23 + .byte 25 + .byte 27 + .byte 29 + .byte 31 + .byte 33 + .byte 35 + .byte 37 + .byte 39 + .byte 41 + .byte 43 + .byte 45 + .byte 47 + .byte 49 + .byte 51 + .byte 53 + .byte 55 + .byte 57 + .byte 59 + .byte 61 + .byte 63 + .byte 64 + .byte 66 + .byte 68 + .byte 70 + .byte 72 + .byte 74 + .byte 76 + .byte 78 + .byte 80 + .byte 82 + .byte 84 + .byte 86 + .byte 88 + .byte 90 + .byte 92 + .byte 94 + .byte 96 + .byte 98 + .byte 100 + .byte 102 + .byte 104 + .byte 106 + .byte 108 + .byte 110 + .byte 112 + .byte 114 + .byte 116 + .byte 118 + .byte 120 + .byte 122 + .byte 124 + .byte 126 + .byte 129 + .byte 131 + .byte 133 + .byte 135 + .byte 137 + .byte 139 + .byte 141 + .byte 143 + .byte 145 + .byte 147 + .byte 149 + .byte 151 + .byte 153 + .byte 155 + .byte 157 + .byte 159 + .byte 161 + .byte 163 + .byte 165 + .byte 167 + .byte 169 + .byte 171 + .byte 173 + .byte 175 + .byte 177 + .byte 179 + .byte 181 + .byte 183 + .byte 185 + .byte 187 + .byte 189 + .byte 191 + .byte 192 + .byte 194 + .byte 196 + .byte 198 + .byte 200 + .byte 202 + .byte 204 + .byte 206 + .byte 208 + .byte 210 + .byte 212 + .byte 214 + .byte 216 + .byte 218 + .byte 220 + .byte 222 + .byte 224 + .byte 226 + .byte 228 + .byte 230 + .byte 232 + .byte 234 + .byte 236 + .byte 238 + .byte 240 + .byte 242 + .byte 244 + .byte 246 + .byte 248 + .byte 250 + .byte 252 + .byte 254 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_3, @object + .size table_3, 256 +table_3: + .byte 0 + .byte 128 + .byte 1 + .byte 129 + .byte 2 + .byte 130 + .byte 3 + .byte 131 + .byte 4 + .byte 132 + .byte 5 + .byte 133 + .byte 6 + .byte 134 + .byte 7 + .byte 135 + .byte 8 + .byte 136 + .byte 9 + .byte 137 + .byte 10 + .byte 138 + .byte 11 + .byte 139 + .byte 12 + .byte 140 + .byte 13 + .byte 141 + .byte 14 + .byte 142 + .byte 15 + .byte 143 + .byte 16 + .byte 144 + .byte 17 + .byte 145 + .byte 18 + .byte 146 + .byte 19 + .byte 147 + .byte 20 + .byte 148 + .byte 21 + .byte 149 + .byte 22 + .byte 150 + .byte 23 + .byte 151 + .byte 24 + .byte 152 + .byte 25 + .byte 153 + .byte 26 + .byte 154 + .byte 27 + .byte 155 + .byte 28 + .byte 156 + .byte 29 + .byte 157 + .byte 30 + .byte 158 + .byte 31 + .byte 159 + .byte 160 + .byte 32 + .byte 161 + .byte 33 + .byte 162 + .byte 34 + .byte 163 + .byte 35 + .byte 164 + .byte 36 + .byte 165 + .byte 37 + .byte 166 + .byte 38 + .byte 167 + .byte 39 + .byte 168 + .byte 40 + .byte 169 + .byte 41 + .byte 170 + .byte 42 + .byte 171 + .byte 43 + .byte 172 + .byte 44 + .byte 173 + .byte 45 + .byte 174 + .byte 46 + .byte 175 + .byte 47 + .byte 176 + .byte 48 + .byte 177 + .byte 49 + .byte 178 + .byte 50 + .byte 179 + .byte 51 + .byte 180 + .byte 52 + .byte 181 + .byte 53 + .byte 182 + .byte 54 + .byte 183 + .byte 55 + .byte 184 + .byte 56 + .byte 185 + .byte 57 + .byte 186 + .byte 58 + .byte 187 + .byte 59 + .byte 188 + .byte 60 + .byte 189 + .byte 61 + .byte 190 + .byte 62 + .byte 191 + .byte 63 + .byte 64 + .byte 192 + .byte 65 + .byte 193 + .byte 66 + .byte 194 + .byte 67 + .byte 195 + .byte 68 + .byte 196 + .byte 69 + .byte 197 + .byte 70 + .byte 198 + .byte 71 + .byte 199 + .byte 72 + .byte 200 + .byte 73 + .byte 201 + .byte 74 + .byte 202 + .byte 75 + .byte 203 + .byte 76 + .byte 204 + .byte 77 + .byte 205 + .byte 78 + .byte 206 + .byte 79 + .byte 207 + .byte 80 + .byte 208 + .byte 81 + .byte 209 + .byte 82 + .byte 210 + .byte 83 + .byte 211 + .byte 84 + .byte 212 + .byte 85 + .byte 213 + .byte 86 + .byte 214 + .byte 87 + .byte 215 + .byte 88 + .byte 216 + .byte 89 + .byte 217 + .byte 90 + .byte 218 + .byte 91 + .byte 219 + .byte 92 + .byte 220 + .byte 93 + .byte 221 + .byte 94 + .byte 222 + .byte 95 + .byte 223 + .byte 224 + .byte 96 + .byte 225 + .byte 97 + .byte 226 + .byte 98 + .byte 227 + .byte 99 + .byte 228 + .byte 100 + .byte 229 + .byte 101 + .byte 230 + .byte 102 + .byte 231 + .byte 103 + .byte 232 + .byte 104 + .byte 233 + .byte 105 + .byte 234 + .byte 106 + .byte 235 + .byte 107 + .byte 236 + .byte 108 + .byte 237 + .byte 109 + .byte 238 + .byte 110 + .byte 239 + .byte 111 + .byte 240 + .byte 112 + .byte 241 + .byte 113 + .byte 242 + .byte 114 + .byte 243 + .byte 115 + .byte 244 + .byte 116 + .byte 245 + .byte 117 + .byte 246 + .byte 118 + .byte 247 + .byte 119 + .byte 248 + .byte 120 + .byte 249 + .byte 121 + .byte 250 + .byte 122 + .byte 251 + .byte 123 + .byte 252 + .byte 124 + .byte 253 + .byte 125 + .byte 254 + .byte 126 + .byte 255 + .byte 127 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_4, @object + .size table_4, 174 +table_4: + .byte 1 + .byte 0 + .byte 3 + .byte 0 + .byte 7 + .byte 0 + .byte 15 + .byte 0 + .byte 15 + .byte 1 + .byte 15 + .byte 3 + .byte 14 + .byte 7 + .byte 13 + .byte 7 + .byte 11 + .byte 7 + .byte 7 + .byte 7 + .byte 15 + .byte 6 + .byte 15 + .byte 5 + .byte 14 + .byte 3 + .byte 12 + .byte 7 + .byte 9 + .byte 7 + .byte 3 + .byte 7 + .byte 7 + .byte 6 + .byte 15 + .byte 4 + .byte 14 + .byte 1 + .byte 13 + .byte 3 + .byte 10 + .byte 7 + .byte 5 + .byte 7 + .byte 11 + .byte 6 + .byte 7 + .byte 5 + .byte 14 + .byte 2 + .byte 12 + .byte 5 + .byte 8 + .byte 3 + .byte 0 + .byte 7 + .byte 1 + .byte 6 + .byte 3 + .byte 4 + .byte 6 + .byte 0 + .byte 13 + .byte 0 + .byte 11 + .byte 1 + .byte 7 + .byte 3 + .byte 14 + .byte 6 + .byte 13 + .byte 5 + .byte 10 + .byte 3 + .byte 4 + .byte 7 + .byte 9 + .byte 6 + .byte 3 + .byte 5 + .byte 6 + .byte 2 + .byte 12 + .byte 4 + .byte 8 + .byte 1 + .byte 1 + .byte 3 + .byte 2 + .byte 6 + .byte 5 + .byte 4 + .byte 10 + .byte 0 + .byte 5 + .byte 1 + .byte 11 + .byte 2 + .byte 6 + .byte 5 + .byte 12 + .byte 2 + .byte 8 + .byte 5 + .byte 0 + .byte 3 + .byte 0 + .byte 6 + .byte 1 + .byte 4 + .byte 2 + .byte 0 + .byte 5 + .byte 0 + .byte 11 + .byte 0 + .byte 7 + .byte 1 + .byte 15 + .byte 2 + .byte 14 + .byte 5 + .byte 12 + .byte 3 + .byte 8 + .byte 7 + .byte 1 + .byte 7 + .byte 3 + .byte 6 + .byte 7 + .byte 4 + .byte 14 + .byte 0 + .byte 13 + .byte 1 + .byte 11 + .byte 3 + .byte 6 + .byte 7 + .byte 13 + .byte 6 + .byte 11 + .byte 5 + .byte 6 + .byte 3 + .byte 12 + .byte 6 + .byte 9 + .byte 5 + .byte 2 + .byte 3 + .byte 4 + .byte 6 + .byte 9 + .byte 4 + .byte 2 + .byte 1 + .byte 5 + .byte 2 + .byte 10 + .byte 4 + .byte 4 + .byte 1 + .byte 9 + .byte 2 + .byte 2 + .byte 5 + .byte 4 + .byte 2 + .byte 8 + .byte 4 + .byte 0 + .byte 1 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_5, @object + .size table_5, 256 +table_5: + .byte 204 + .byte 198 + .byte 201 + .byte 192 + .byte 193 + .byte 202 + .byte 194 + .byte 203 + .byte 195 + .byte 200 + .byte 197 + .byte 205 + .byte 196 + .byte 206 + .byte 199 + .byte 207 + .byte 108 + .byte 102 + .byte 105 + .byte 96 + .byte 97 + .byte 106 + .byte 98 + .byte 107 + .byte 99 + .byte 104 + .byte 101 + .byte 109 + .byte 100 + .byte 110 + .byte 103 + .byte 111 + .byte 156 + .byte 150 + .byte 153 + .byte 144 + .byte 145 + .byte 154 + .byte 146 + .byte 155 + .byte 147 + .byte 152 + .byte 149 + .byte 157 + .byte 148 + .byte 158 + .byte 151 + .byte 159 + .byte 12 + .byte 6 + .byte 9 + .byte 0 + .byte 1 + .byte 10 + .byte 2 + .byte 11 + .byte 3 + .byte 8 + .byte 5 + .byte 13 + .byte 4 + .byte 14 + .byte 7 + .byte 15 + .byte 28 + .byte 22 + .byte 25 + .byte 16 + .byte 17 + .byte 26 + .byte 18 + .byte 27 + .byte 19 + .byte 24 + .byte 21 + .byte 29 + .byte 20 + .byte 30 + .byte 23 + .byte 31 + .byte 172 + .byte 166 + .byte 169 + .byte 160 + .byte 161 + .byte 170 + .byte 162 + .byte 171 + .byte 163 + .byte 168 + .byte 165 + .byte 173 + .byte 164 + .byte 174 + .byte 167 + .byte 175 + .byte 44 + .byte 38 + .byte 41 + .byte 32 + .byte 33 + .byte 42 + .byte 34 + .byte 43 + .byte 35 + .byte 40 + .byte 37 + .byte 45 + .byte 36 + .byte 46 + .byte 39 + .byte 47 + .byte 188 + .byte 182 + .byte 185 + .byte 176 + .byte 177 + .byte 186 + .byte 178 + .byte 187 + .byte 179 + .byte 184 + .byte 181 + .byte 189 + .byte 180 + .byte 190 + .byte 183 + .byte 191 + .byte 60 + .byte 54 + .byte 57 + .byte 48 + .byte 49 + .byte 58 + .byte 50 + .byte 59 + .byte 51 + .byte 56 + .byte 53 + .byte 61 + .byte 52 + .byte 62 + .byte 55 + .byte 63 + .byte 140 + .byte 134 + .byte 137 + .byte 128 + .byte 129 + .byte 138 + .byte 130 + .byte 139 + .byte 131 + .byte 136 + .byte 133 + .byte 141 + .byte 132 + .byte 142 + .byte 135 + .byte 143 + .byte 92 + .byte 86 + .byte 89 + .byte 80 + .byte 81 + .byte 90 + .byte 82 + .byte 91 + .byte 83 + .byte 88 + .byte 85 + .byte 93 + .byte 84 + .byte 94 + .byte 87 + .byte 95 + .byte 220 + .byte 214 + .byte 217 + .byte 208 + .byte 209 + .byte 218 + .byte 210 + .byte 219 + .byte 211 + .byte 216 + .byte 213 + .byte 221 + .byte 212 + .byte 222 + .byte 215 + .byte 223 + .byte 76 + .byte 70 + .byte 73 + .byte 64 + .byte 65 + .byte 74 + .byte 66 + .byte 75 + .byte 67 + .byte 72 + .byte 69 + .byte 77 + .byte 68 + .byte 78 + .byte 71 + .byte 79 + .byte 236 + .byte 230 + .byte 233 + .byte 224 + .byte 225 + .byte 234 + .byte 226 + .byte 235 + .byte 227 + .byte 232 + .byte 229 + .byte 237 + .byte 228 + .byte 238 + .byte 231 + .byte 239 + .byte 124 + .byte 118 + .byte 121 + .byte 112 + .byte 113 + .byte 122 + .byte 114 + .byte 123 + .byte 115 + .byte 120 + .byte 117 + .byte 125 + .byte 116 + .byte 126 + .byte 119 + .byte 127 + .byte 252 + .byte 246 + .byte 249 + .byte 240 + .byte 241 + .byte 250 + .byte 242 + .byte 251 + .byte 243 + .byte 248 + .byte 245 + .byte 253 + .byte 244 + .byte 254 + .byte 247 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_6, @object + .size table_6, 256 +table_6: + .byte 51 + .byte 52 + .byte 54 + .byte 56 + .byte 60 + .byte 58 + .byte 49 + .byte 62 + .byte 57 + .byte 50 + .byte 53 + .byte 55 + .byte 48 + .byte 59 + .byte 61 + .byte 63 + .byte 67 + .byte 68 + .byte 70 + .byte 72 + .byte 76 + .byte 74 + .byte 65 + .byte 78 + .byte 73 + .byte 66 + .byte 69 + .byte 71 + .byte 64 + .byte 75 + .byte 77 + .byte 79 + .byte 99 + .byte 100 + .byte 102 + .byte 104 + .byte 108 + .byte 106 + .byte 97 + .byte 110 + .byte 105 + .byte 98 + .byte 101 + .byte 103 + .byte 96 + .byte 107 + .byte 109 + .byte 111 + .byte 131 + .byte 132 + .byte 134 + .byte 136 + .byte 140 + .byte 138 + .byte 129 + .byte 142 + .byte 137 + .byte 130 + .byte 133 + .byte 135 + .byte 128 + .byte 139 + .byte 141 + .byte 143 + .byte 195 + .byte 196 + .byte 198 + .byte 200 + .byte 204 + .byte 202 + .byte 193 + .byte 206 + .byte 201 + .byte 194 + .byte 197 + .byte 199 + .byte 192 + .byte 203 + .byte 205 + .byte 207 + .byte 163 + .byte 164 + .byte 166 + .byte 168 + .byte 172 + .byte 170 + .byte 161 + .byte 174 + .byte 169 + .byte 162 + .byte 165 + .byte 167 + .byte 160 + .byte 171 + .byte 173 + .byte 175 + .byte 19 + .byte 20 + .byte 22 + .byte 24 + .byte 28 + .byte 26 + .byte 17 + .byte 30 + .byte 25 + .byte 18 + .byte 21 + .byte 23 + .byte 16 + .byte 27 + .byte 29 + .byte 31 + .byte 227 + .byte 228 + .byte 230 + .byte 232 + .byte 236 + .byte 234 + .byte 225 + .byte 238 + .byte 233 + .byte 226 + .byte 229 + .byte 231 + .byte 224 + .byte 235 + .byte 237 + .byte 239 + .byte 147 + .byte 148 + .byte 150 + .byte 152 + .byte 156 + .byte 154 + .byte 145 + .byte 158 + .byte 153 + .byte 146 + .byte 149 + .byte 151 + .byte 144 + .byte 155 + .byte 157 + .byte 159 + .byte 35 + .byte 36 + .byte 38 + .byte 40 + .byte 44 + .byte 42 + .byte 33 + .byte 46 + .byte 41 + .byte 34 + .byte 37 + .byte 39 + .byte 32 + .byte 43 + .byte 45 + .byte 47 + .byte 83 + .byte 84 + .byte 86 + .byte 88 + .byte 92 + .byte 90 + .byte 81 + .byte 94 + .byte 89 + .byte 82 + .byte 85 + .byte 87 + .byte 80 + .byte 91 + .byte 93 + .byte 95 + .byte 115 + .byte 116 + .byte 118 + .byte 120 + .byte 124 + .byte 122 + .byte 113 + .byte 126 + .byte 121 + .byte 114 + .byte 117 + .byte 119 + .byte 112 + .byte 123 + .byte 125 + .byte 127 + .byte 3 + .byte 4 + .byte 6 + .byte 8 + .byte 12 + .byte 10 + .byte 1 + .byte 14 + .byte 9 + .byte 2 + .byte 5 + .byte 7 + .byte 0 + .byte 11 + .byte 13 + .byte 15 + .byte 179 + .byte 180 + .byte 182 + .byte 184 + .byte 188 + .byte 186 + .byte 177 + .byte 190 + .byte 185 + .byte 178 + .byte 181 + .byte 183 + .byte 176 + .byte 187 + .byte 189 + .byte 191 + .byte 211 + .byte 212 + .byte 214 + .byte 216 + .byte 220 + .byte 218 + .byte 209 + .byte 222 + .byte 217 + .byte 210 + .byte 213 + .byte 215 + .byte 208 + .byte 219 + .byte 221 + .byte 223 + .byte 243 + .byte 244 + .byte 246 + .byte 248 + .byte 252 + .byte 250 + .byte 241 + .byte 254 + .byte 249 + .byte 242 + .byte 245 + .byte 247 + .byte 240 + .byte 251 + .byte 253 + .byte 255 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_7, @object + .size table_7, 256 +table_7: + .byte 0 + .byte 2 + .byte 4 + .byte 6 + .byte 9 + .byte 11 + .byte 13 + .byte 15 + .byte 1 + .byte 3 + .byte 5 + .byte 7 + .byte 8 + .byte 10 + .byte 12 + .byte 14 + .byte 32 + .byte 34 + .byte 36 + .byte 38 + .byte 41 + .byte 43 + .byte 45 + .byte 47 + .byte 33 + .byte 35 + .byte 37 + .byte 39 + .byte 40 + .byte 42 + .byte 44 + .byte 46 + .byte 64 + .byte 66 + .byte 68 + .byte 70 + .byte 73 + .byte 75 + .byte 77 + .byte 79 + .byte 65 + .byte 67 + .byte 69 + .byte 71 + .byte 72 + .byte 74 + .byte 76 + .byte 78 + .byte 96 + .byte 98 + .byte 100 + .byte 102 + .byte 105 + .byte 107 + .byte 109 + .byte 111 + .byte 97 + .byte 99 + .byte 101 + .byte 103 + .byte 104 + .byte 106 + .byte 108 + .byte 110 + .byte 144 + .byte 146 + .byte 148 + .byte 150 + .byte 153 + .byte 155 + .byte 157 + .byte 159 + .byte 145 + .byte 147 + .byte 149 + .byte 151 + .byte 152 + .byte 154 + .byte 156 + .byte 158 + .byte 176 + .byte 178 + .byte 180 + .byte 182 + .byte 185 + .byte 187 + .byte 189 + .byte 191 + .byte 177 + .byte 179 + .byte 181 + .byte 183 + .byte 184 + .byte 186 + .byte 188 + .byte 190 + .byte 208 + .byte 210 + .byte 212 + .byte 214 + .byte 217 + .byte 219 + .byte 221 + .byte 223 + .byte 209 + .byte 211 + .byte 213 + .byte 215 + .byte 216 + .byte 218 + .byte 220 + .byte 222 + .byte 240 + .byte 242 + .byte 244 + .byte 246 + .byte 249 + .byte 251 + .byte 253 + .byte 255 + .byte 241 + .byte 243 + .byte 245 + .byte 247 + .byte 248 + .byte 250 + .byte 252 + .byte 254 + .byte 16 + .byte 18 + .byte 20 + .byte 22 + .byte 25 + .byte 27 + .byte 29 + .byte 31 + .byte 17 + .byte 19 + .byte 21 + .byte 23 + .byte 24 + .byte 26 + .byte 28 + .byte 30 + .byte 48 + .byte 50 + .byte 52 + .byte 54 + .byte 57 + .byte 59 + .byte 61 + .byte 63 + .byte 49 + .byte 51 + .byte 53 + .byte 55 + .byte 56 + .byte 58 + .byte 60 + .byte 62 + .byte 80 + .byte 82 + .byte 84 + .byte 86 + .byte 89 + .byte 91 + .byte 93 + .byte 95 + .byte 81 + .byte 83 + .byte 85 + .byte 87 + .byte 88 + .byte 90 + .byte 92 + .byte 94 + .byte 112 + .byte 114 + .byte 116 + .byte 118 + .byte 121 + .byte 123 + .byte 125 + .byte 127 + .byte 113 + .byte 115 + .byte 117 + .byte 119 + .byte 120 + .byte 122 + .byte 124 + .byte 126 + .byte 128 + .byte 130 + .byte 132 + .byte 134 + .byte 137 + .byte 139 + .byte 141 + .byte 143 + .byte 129 + .byte 131 + .byte 133 + .byte 135 + .byte 136 + .byte 138 + .byte 140 + .byte 142 + .byte 160 + .byte 162 + .byte 164 + .byte 166 + .byte 169 + .byte 171 + .byte 173 + .byte 175 + .byte 161 + .byte 163 + .byte 165 + .byte 167 + .byte 168 + .byte 170 + .byte 172 + .byte 174 + .byte 192 + .byte 194 + .byte 196 + .byte 198 + .byte 201 + .byte 203 + .byte 205 + .byte 207 + .byte 193 + .byte 195 + .byte 197 + .byte 199 + .byte 200 + .byte 202 + .byte 204 + .byte 206 + .byte 224 + .byte 226 + .byte 228 + .byte 230 + .byte 233 + .byte 235 + .byte 237 + .byte 239 + .byte 225 + .byte 227 + .byte 229 + .byte 231 + .byte 232 + .byte 234 + .byte 236 + .byte 238 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_8, @object + .size table_8, 256 +table_8: + .byte 0 + .byte 8 + .byte 1 + .byte 9 + .byte 2 + .byte 10 + .byte 3 + .byte 11 + .byte 12 + .byte 4 + .byte 13 + .byte 5 + .byte 14 + .byte 6 + .byte 15 + .byte 7 + .byte 128 + .byte 136 + .byte 129 + .byte 137 + .byte 130 + .byte 138 + .byte 131 + .byte 139 + .byte 140 + .byte 132 + .byte 141 + .byte 133 + .byte 142 + .byte 134 + .byte 143 + .byte 135 + .byte 16 + .byte 24 + .byte 17 + .byte 25 + .byte 18 + .byte 26 + .byte 19 + .byte 27 + .byte 28 + .byte 20 + .byte 29 + .byte 21 + .byte 30 + .byte 22 + .byte 31 + .byte 23 + .byte 144 + .byte 152 + .byte 145 + .byte 153 + .byte 146 + .byte 154 + .byte 147 + .byte 155 + .byte 156 + .byte 148 + .byte 157 + .byte 149 + .byte 158 + .byte 150 + .byte 159 + .byte 151 + .byte 32 + .byte 40 + .byte 33 + .byte 41 + .byte 34 + .byte 42 + .byte 35 + .byte 43 + .byte 44 + .byte 36 + .byte 45 + .byte 37 + .byte 46 + .byte 38 + .byte 47 + .byte 39 + .byte 160 + .byte 168 + .byte 161 + .byte 169 + .byte 162 + .byte 170 + .byte 163 + .byte 171 + .byte 172 + .byte 164 + .byte 173 + .byte 165 + .byte 174 + .byte 166 + .byte 175 + .byte 167 + .byte 48 + .byte 56 + .byte 49 + .byte 57 + .byte 50 + .byte 58 + .byte 51 + .byte 59 + .byte 60 + .byte 52 + .byte 61 + .byte 53 + .byte 62 + .byte 54 + .byte 63 + .byte 55 + .byte 176 + .byte 184 + .byte 177 + .byte 185 + .byte 178 + .byte 186 + .byte 179 + .byte 187 + .byte 188 + .byte 180 + .byte 189 + .byte 181 + .byte 190 + .byte 182 + .byte 191 + .byte 183 + .byte 192 + .byte 200 + .byte 193 + .byte 201 + .byte 194 + .byte 202 + .byte 195 + .byte 203 + .byte 204 + .byte 196 + .byte 205 + .byte 197 + .byte 206 + .byte 198 + .byte 207 + .byte 199 + .byte 64 + .byte 72 + .byte 65 + .byte 73 + .byte 66 + .byte 74 + .byte 67 + .byte 75 + .byte 76 + .byte 68 + .byte 77 + .byte 69 + .byte 78 + .byte 70 + .byte 79 + .byte 71 + .byte 208 + .byte 216 + .byte 209 + .byte 217 + .byte 210 + .byte 218 + .byte 211 + .byte 219 + .byte 220 + .byte 212 + .byte 221 + .byte 213 + .byte 222 + .byte 214 + .byte 223 + .byte 215 + .byte 80 + .byte 88 + .byte 81 + .byte 89 + .byte 82 + .byte 90 + .byte 83 + .byte 91 + .byte 92 + .byte 84 + .byte 93 + .byte 85 + .byte 94 + .byte 86 + .byte 95 + .byte 87 + .byte 224 + .byte 232 + .byte 225 + .byte 233 + .byte 226 + .byte 234 + .byte 227 + .byte 235 + .byte 236 + .byte 228 + .byte 237 + .byte 229 + .byte 238 + .byte 230 + .byte 239 + .byte 231 + .byte 96 + .byte 104 + .byte 97 + .byte 105 + .byte 98 + .byte 106 + .byte 99 + .byte 107 + .byte 108 + .byte 100 + .byte 109 + .byte 101 + .byte 110 + .byte 102 + .byte 111 + .byte 103 + .byte 240 + .byte 248 + .byte 241 + .byte 249 + .byte 242 + .byte 250 + .byte 243 + .byte 251 + .byte 252 + .byte 244 + .byte 253 + .byte 245 + .byte 254 + .byte 246 + .byte 255 + .byte 247 + .byte 112 + .byte 120 + .byte 113 + .byte 121 + .byte 114 + .byte 122 + .byte 115 + .byte 123 + .byte 124 + .byte 116 + .byte 125 + .byte 117 + .byte 126 + .byte 118 + .byte 127 + .byte 119 + + .text +.global forkskinny_128_256_rounds + .type forkskinny_128_256_rounds, @function +forkskinny_128_256_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 51 + ldd r2,Z+32 + ldd r3,Z+33 + ldd r4,Z+34 + ldd r5,Z+35 + ldd r6,Z+36 + ldd r7,Z+37 + ldd r8,Z+38 + ldd r9,Z+39 + ldd r10,Z+40 + ldd r11,Z+41 + ldd r12,Z+42 + ldd r13,Z+43 + ldd r14,Z+44 + ldd r15,Z+45 + ldd r24,Z+46 + ldd r25,Z+47 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + lsl r20 + std Y+33,r20 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +86: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + mov r0,r14 + mov r14,r15 + mov r15,r24 + mov r24,r25 + mov r25,r0 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + movw r18,r14 + movw r26,r24 + eor r18,r10 + eor r19,r11 + eor r26,r12 + eor r27,r13 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r18 + movw r4,r26 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + ldd r0,Y+1 + std Y+9,r0 + ldd r0,Y+2 + std Y+10,r0 + ldd r0,Y+3 + std Y+11,r0 + ldd r0,Y+4 + std Y+12,r0 + ldd r0,Y+5 + std Y+13,r0 + ldd r0,Y+6 + std Y+14,r0 + ldd r0,Y+7 + std Y+15,r0 + ldd r0,Y+8 + std Y+16,r0 + std Y+1,r19 + std Y+2,r17 + std Y+3,r18 + std Y+4,r21 + std Y+5,r26 + std Y+6,r16 + std Y+7,r20 + std Y+8,r27 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r16,Y+31 + ldd r17,Y+32 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+17 + std Y+25,r0 + ldd r0,Y+18 + std Y+26,r0 + ldd r0,Y+19 + std Y+27,r0 + ldd r0,Y+20 + std Y+28,r0 + ldd r0,Y+21 + std Y+29,r0 + ldd r0,Y+22 + std Y+30,r0 + ldd r0,Y+23 + std Y+31,r0 + ldd r0,Y+24 + std Y+32,r0 + std Y+17,r19 + std Y+18,r17 + std Y+19,r18 + std Y+20,r21 + std Y+21,r26 + std Y+22,r16 + std Y+23,r20 + std Y+24,r27 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + ldd r18,Y+33 + cp r22,r18 + breq 5259f + rjmp 86b +5259: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+32,r2 + std Z+33,r3 + std Z+34,r4 + std Z+35,r5 + std Z+36,r6 + std Z+37,r7 + std Z+38,r8 + std Z+39,r9 + std Z+40,r10 + std Z+41,r11 + std Z+42,r12 + std Z+43,r13 + std Z+44,r14 + std Z+45,r15 + std Z+46,r24 + std Z+47,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + adiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_256_rounds, .-forkskinny_128_256_rounds + + .text +.global forkskinny_128_256_inv_rounds + .type forkskinny_128_256_inv_rounds, @function +forkskinny_128_256_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 51 + ldd r2,Z+32 + ldd r3,Z+33 + ldd r4,Z+34 + ldd r5,Z+35 + ldd r6,Z+36 + ldd r7,Z+37 + ldd r8,Z+38 + ldd r9,Z+39 + ldd r10,Z+40 + ldd r11,Z+41 + ldd r12,Z+42 + ldd r13,Z+43 + ldd r14,Z+44 + ldd r15,Z+45 + ldd r24,Z+46 + ldd r25,Z+47 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + lsl r20 + std Y+33,r20 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +86: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + ldd r0,Y+9 + std Y+1,r0 + ldd r0,Y+10 + std Y+2,r0 + ldd r0,Y+11 + std Y+3,r0 + ldd r0,Y+12 + std Y+4,r0 + ldd r0,Y+13 + std Y+5,r0 + ldd r0,Y+14 + std Y+6,r0 + ldd r0,Y+15 + std Y+7,r0 + ldd r0,Y+16 + std Y+8,r0 + std Y+9,r26 + std Y+10,r18 + std Y+11,r20 + std Y+12,r17 + std Y+13,r16 + std Y+14,r27 + std Y+15,r21 + std Y+16,r19 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r16,Y+23 + ldd r17,Y+24 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+25 + std Y+17,r0 + ldd r0,Y+26 + std Y+18,r0 + ldd r0,Y+27 + std Y+19,r0 + ldd r0,Y+28 + std Y+20,r0 + ldd r0,Y+29 + std Y+21,r0 + ldd r0,Y+30 + std Y+22,r0 + ldd r0,Y+31 + std Y+23,r0 + ldd r0,Y+32 + std Y+24,r0 + std Y+25,r26 + std Y+26,r18 + std Y+27,r20 + std Y+28,r17 + std Y+29,r16 + std Y+30,r27 + std Y+31,r21 + std Y+32,r19 + movw r18,r2 + movw r26,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r18 + movw r24,r26 + eor r14,r10 + eor r15,r11 + eor r24,r12 + eor r25,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + mov r0,r6 + mov r6,r7 + mov r7,r8 + mov r8,r9 + mov r9,r0 + mov r0,r10 + mov r10,r12 + mov r12,r0 + mov r0,r11 + mov r11,r13 + mov r13,r0 + mov r0,r25 + mov r25,r24 + mov r24,r15 + mov r15,r14 + mov r14,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + ldi r30,lo8(table_1) + ldi r31,hi8(table_1) +#if defined(RAMPZ) + ldi r18,hh8(table_1) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+33 + cp r22,r18 + breq 5259f + rjmp 86b +5259: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+32,r2 + std Z+33,r3 + std Z+34,r4 + std Z+35,r5 + std Z+36,r6 + std Z+37,r7 + std Z+38,r8 + std Z+39,r9 + std Z+40,r10 + std Z+41,r11 + std Z+42,r12 + std Z+43,r13 + std Z+44,r14 + std Z+45,r15 + std Z+46,r24 + std Z+47,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + adiw r28,33 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_256_inv_rounds, .-forkskinny_128_256_inv_rounds + + .text +.global forkskinny_128_256_forward_tk + .type forkskinny_128_256_forward_tk, @function +forkskinny_128_256_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 34 + ld r4,Z + ldd r5,Z+1 + ldd r6,Z+2 + ldd r7,Z+3 + ldd r8,Z+4 + ldd r9,Z+5 + ldd r10,Z+6 + ldd r11,Z+7 + ldd r12,Z+8 + ldd r13,Z+9 + ldd r14,Z+10 + ldd r15,Z+11 + ldd r24,Z+12 + ldd r25,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + push r31 + push r30 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r23,hh8(table_2) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +51: + movw r18,r12 + movw r20,r14 + movw r26,r24 + movw r2,r16 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + mov r4,r19 + mov r5,r3 + mov r6,r18 + mov r7,r27 + mov r8,r20 + mov r9,r2 + mov r10,r26 + mov r11,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r26,Y+13 + ldd r27,Y+14 + ldd r2,Y+15 + ldd r3,Y+16 + ldd r23,Y+1 + std Y+9,r23 + ldd r23,Y+2 + std Y+10,r23 + ldd r23,Y+3 + std Y+11,r23 + ldd r23,Y+4 + std Y+12,r23 + ldd r23,Y+5 + std Y+13,r23 + ldd r23,Y+6 + std Y+14,r23 + ldd r23,Y+7 + std Y+15,r23 + ldd r23,Y+8 + std Y+16,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+1,r19 + std Y+2,r3 + std Y+3,r18 + std Y+4,r27 + std Y+5,r20 + std Y+6,r2 + std Y+7,r26 + std Y+8,r21 + dec r22 + breq 5109f + rjmp 51b +5109: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r4 + std Z+1,r5 + std Z+2,r6 + std Z+3,r7 + std Z+4,r8 + std Z+5,r9 + std Z+6,r10 + std Z+7,r11 + std Z+8,r12 + std Z+9,r13 + std Z+10,r14 + std Z+11,r15 + std Z+12,r24 + std Z+13,r25 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_256_forward_tk, .-forkskinny_128_256_forward_tk + + .text +.global forkskinny_128_256_reverse_tk + .type forkskinny_128_256_reverse_tk, @function +forkskinny_128_256_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 34 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r23,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +51: + movw r18,r2 + movw r20,r4 + movw r26,r6 + movw r24,r8 + movw r2,r10 + movw r4,r12 + movw r6,r14 + movw r8,r16 + mov r10,r20 + mov r11,r18 + mov r12,r26 + mov r13,r25 + mov r14,r24 + mov r15,r21 + mov r16,r27 + mov r17,r19 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r24,Y+7 + ldd r25,Y+8 + ldd r23,Y+9 + std Y+1,r23 + ldd r23,Y+10 + std Y+2,r23 + ldd r23,Y+11 + std Y+3,r23 + ldd r23,Y+12 + std Y+4,r23 + ldd r23,Y+13 + std Y+5,r23 + ldd r23,Y+14 + std Y+6,r23 + ldd r23,Y+15 + std Y+7,r23 + ldd r23,Y+16 + std Y+8,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+9,r20 + std Y+10,r18 + std Y+11,r26 + std Y+12,r25 + std Y+13,r24 + std Y+14,r21 + std Y+15,r27 + std Y+16,r19 + dec r22 + breq 5109f + rjmp 51b +5109: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r2 + std Z+1,r3 + std Z+2,r4 + std Z+3,r5 + std Z+4,r6 + std Z+5,r7 + std Z+6,r8 + std Z+7,r9 + std Z+8,r10 + std Z+9,r11 + std Z+10,r12 + std Z+11,r13 + std Z+12,r14 + std Z+13,r15 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_256_reverse_tk, .-forkskinny_128_256_reverse_tk + + .text +.global forkskinny_128_384_rounds + .type forkskinny_128_384_rounds, @function +forkskinny_128_384_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 67 + ldd r2,Z+48 + ldd r3,Z+49 + ldd r4,Z+50 + ldd r5,Z+51 + ldd r6,Z+52 + ldd r7,Z+53 + ldd r8,Z+54 + ldd r9,Z+55 + ldd r10,Z+56 + ldd r11,Z+57 + ldd r12,Z+58 + ldd r13,Z+59 + ldd r14,Z+60 + ldd r15,Z+61 + ldd r24,Z+62 + ldd r25,Z+63 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r26,Z+34 + ldd r27,Z+35 + std Y+33,r18 + std Y+34,r19 + std Y+35,r26 + std Y+36,r27 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + std Y+37,r18 + std Y+38,r19 + std Y+39,r26 + std Y+40,r27 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r26,Z+42 + ldd r27,Z+43 + std Y+41,r18 + std Y+42,r19 + std Y+43,r26 + std Y+44,r27 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r26,Z+46 + ldd r27,Z+47 + std Y+45,r18 + std Y+46,r19 + std Y+47,r26 + std Y+48,r27 + lsl r20 + std Y+49,r20 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +118: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + mov r0,r9 + mov r9,r8 + mov r8,r7 + mov r7,r6 + mov r6,r0 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + mov r0,r14 + mov r14,r15 + mov r15,r24 + mov r24,r25 + mov r25,r0 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + movw r18,r14 + movw r26,r24 + eor r18,r10 + eor r19,r11 + eor r26,r12 + eor r27,r13 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r18 + movw r4,r26 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + ldd r0,Y+1 + std Y+9,r0 + ldd r0,Y+2 + std Y+10,r0 + ldd r0,Y+3 + std Y+11,r0 + ldd r0,Y+4 + std Y+12,r0 + ldd r0,Y+5 + std Y+13,r0 + ldd r0,Y+6 + std Y+14,r0 + ldd r0,Y+7 + std Y+15,r0 + ldd r0,Y+8 + std Y+16,r0 + std Y+1,r19 + std Y+2,r17 + std Y+3,r18 + std Y+4,r21 + std Y+5,r26 + std Y+6,r16 + std Y+7,r20 + std Y+8,r27 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r16,Y+31 + ldd r17,Y+32 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+17 + std Y+25,r0 + ldd r0,Y+18 + std Y+26,r0 + ldd r0,Y+19 + std Y+27,r0 + ldd r0,Y+20 + std Y+28,r0 + ldd r0,Y+21 + std Y+29,r0 + ldd r0,Y+22 + std Y+30,r0 + ldd r0,Y+23 + std Y+31,r0 + ldd r0,Y+24 + std Y+32,r0 + std Y+17,r19 + std Y+18,r17 + std Y+19,r18 + std Y+20,r21 + std Y+21,r26 + std Y+22,r16 + std Y+23,r20 + std Y+24,r27 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + ldd r20,Y+45 + ldd r21,Y+46 + ldd r16,Y+47 + ldd r17,Y+48 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+33 + std Y+41,r0 + ldd r0,Y+34 + std Y+42,r0 + ldd r0,Y+35 + std Y+43,r0 + ldd r0,Y+36 + std Y+44,r0 + ldd r0,Y+37 + std Y+45,r0 + ldd r0,Y+38 + std Y+46,r0 + ldd r0,Y+39 + std Y+47,r0 + ldd r0,Y+40 + std Y+48,r0 + std Y+33,r19 + std Y+34,r17 + std Y+35,r18 + std Y+36,r21 + std Y+37,r26 + std Y+38,r16 + std Y+39,r20 + std Y+40,r27 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r18,hh8(table_0) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + ldd r18,Y+49 + cp r22,r18 + breq 5348f + rjmp 118b +5348: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+48,r2 + std Z+49,r3 + std Z+50,r4 + std Z+51,r5 + std Z+52,r6 + std Z+53,r7 + std Z+54,r8 + std Z+55,r9 + std Z+56,r10 + std Z+57,r11 + std Z+58,r12 + std Z+59,r13 + std Z+60,r14 + std Z+61,r15 + std Z+62,r24 + std Z+63,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + std Z+32,r18 + std Z+33,r19 + std Z+34,r26 + std Z+35,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + std Z+36,r18 + std Z+37,r19 + std Z+38,r26 + std Z+39,r27 + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + std Z+40,r18 + std Z+41,r19 + std Z+42,r26 + std Z+43,r27 + ldd r18,Y+45 + ldd r19,Y+46 + ldd r26,Y+47 + ldd r27,Y+48 + std Z+44,r18 + std Z+45,r19 + std Z+46,r26 + std Z+47,r27 + adiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_384_rounds, .-forkskinny_128_384_rounds + + .text +.global forkskinny_128_384_inv_rounds + .type forkskinny_128_384_inv_rounds, @function +forkskinny_128_384_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 67 + ldd r2,Z+48 + ldd r3,Z+49 + ldd r4,Z+50 + ldd r5,Z+51 + ldd r6,Z+52 + ldd r7,Z+53 + ldd r8,Z+54 + ldd r9,Z+55 + ldd r10,Z+56 + ldd r11,Z+57 + ldd r12,Z+58 + ldd r13,Z+59 + ldd r14,Z+60 + ldd r15,Z+61 + ldd r24,Z+62 + ldd r25,Z+63 + ld r18,Z + ldd r19,Z+1 + ldd r26,Z+2 + ldd r27,Z+3 + std Y+1,r18 + std Y+2,r19 + std Y+3,r26 + std Y+4,r27 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + std Y+5,r18 + std Y+6,r19 + std Y+7,r26 + std Y+8,r27 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r26,Z+10 + ldd r27,Z+11 + std Y+9,r18 + std Y+10,r19 + std Y+11,r26 + std Y+12,r27 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r26,Z+14 + ldd r27,Z+15 + std Y+13,r18 + std Y+14,r19 + std Y+15,r26 + std Y+16,r27 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + std Y+17,r18 + std Y+18,r19 + std Y+19,r26 + std Y+20,r27 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + std Y+21,r18 + std Y+22,r19 + std Y+23,r26 + std Y+24,r27 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + std Y+25,r18 + std Y+26,r19 + std Y+27,r26 + std Y+28,r27 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + std Y+29,r18 + std Y+30,r19 + std Y+31,r26 + std Y+32,r27 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r26,Z+34 + ldd r27,Z+35 + std Y+33,r18 + std Y+34,r19 + std Y+35,r26 + std Y+36,r27 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + std Y+37,r18 + std Y+38,r19 + std Y+39,r26 + std Y+40,r27 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r26,Z+42 + ldd r27,Z+43 + std Y+41,r18 + std Y+42,r19 + std Y+43,r26 + std Y+44,r27 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r26,Z+46 + ldd r27,Z+47 + std Y+45,r18 + std Y+46,r19 + std Y+47,r26 + std Y+48,r27 + lsl r20 + std Y+49,r20 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 +118: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + ldd r0,Y+9 + std Y+1,r0 + ldd r0,Y+10 + std Y+2,r0 + ldd r0,Y+11 + std Y+3,r0 + ldd r0,Y+12 + std Y+4,r0 + ldd r0,Y+13 + std Y+5,r0 + ldd r0,Y+14 + std Y+6,r0 + ldd r0,Y+15 + std Y+7,r0 + ldd r0,Y+16 + std Y+8,r0 + std Y+9,r26 + std Y+10,r18 + std Y+11,r20 + std Y+12,r17 + std Y+13,r16 + std Y+14,r27 + std Y+15,r21 + std Y+16,r19 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r16,Y+23 + ldd r17,Y+24 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+25 + std Y+17,r0 + ldd r0,Y+26 + std Y+18,r0 + ldd r0,Y+27 + std Y+19,r0 + ldd r0,Y+28 + std Y+20,r0 + ldd r0,Y+29 + std Y+21,r0 + ldd r0,Y+30 + std Y+22,r0 + ldd r0,Y+31 + std Y+23,r0 + ldd r0,Y+32 + std Y+24,r0 + std Y+25,r26 + std Y+26,r18 + std Y+27,r20 + std Y+28,r17 + std Y+29,r16 + std Y+30,r27 + std Y+31,r21 + std Y+32,r19 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + ldd r20,Y+37 + ldd r21,Y+38 + ldd r16,Y+39 + ldd r17,Y+40 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r16 +#if defined(RAMPZ) + elpm r16,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r16,Z +#elif defined(__AVR_TINY__) + ld r16,Z +#else + lpm + mov r16,r0 +#endif + mov r30,r17 +#if defined(RAMPZ) + elpm r17,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r17,Z +#elif defined(__AVR_TINY__) + ld r17,Z +#else + lpm + mov r17,r0 +#endif + ldd r0,Y+41 + std Y+33,r0 + ldd r0,Y+42 + std Y+34,r0 + ldd r0,Y+43 + std Y+35,r0 + ldd r0,Y+44 + std Y+36,r0 + ldd r0,Y+45 + std Y+37,r0 + ldd r0,Y+46 + std Y+38,r0 + ldd r0,Y+47 + std Y+39,r0 + ldd r0,Y+48 + std Y+40,r0 + std Y+41,r26 + std Y+42,r18 + std Y+43,r20 + std Y+44,r17 + std Y+45,r16 + std Y+46,r27 + std Y+47,r21 + std Y+48,r19 + movw r18,r2 + movw r26,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r18 + movw r24,r26 + eor r14,r10 + eor r15,r11 + eor r24,r12 + eor r25,r13 + eor r10,r2 + eor r11,r3 + eor r12,r4 + eor r13,r5 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + mov r0,r6 + mov r6,r7 + mov r7,r8 + mov r8,r9 + mov r9,r0 + mov r0,r10 + mov r10,r12 + mov r12,r0 + mov r0,r11 + mov r11,r13 + mov r13,r0 + mov r0,r25 + mov r25,r24 + mov r24,r15 + mov r15,r14 + mov r14,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + eor r2,r18 + eor r3,r19 + eor r4,r26 + eor r5,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + eor r6,r18 + eor r7,r19 + eor r8,r26 + eor r9,r27 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r6,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r2,r18 + ldi r18,2 + eor r10,r18 + eor r4,r18 + ldi r30,lo8(table_1) + ldi r31,hi8(table_1) +#if defined(RAMPZ) + ldi r18,hh8(table_1) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r30,r14 +#if defined(RAMPZ) + elpm r14,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r14,Z +#elif defined(__AVR_TINY__) + ld r14,Z +#else + lpm + mov r14,r0 +#endif + mov r30,r15 +#if defined(RAMPZ) + elpm r15,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r15,Z +#elif defined(__AVR_TINY__) + ld r15,Z +#else + lpm + mov r15,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+49 + cp r22,r18 + breq 5348f + rjmp 118b +5348: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+48,r2 + std Z+49,r3 + std Z+50,r4 + std Z+51,r5 + std Z+52,r6 + std Z+53,r7 + std Z+54,r8 + std Z+55,r9 + std Z+56,r10 + std Z+57,r11 + std Z+58,r12 + std Z+59,r13 + std Z+60,r14 + std Z+61,r15 + std Z+62,r24 + std Z+63,r25 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r26,Y+3 + ldd r27,Y+4 + st Z,r18 + std Z+1,r19 + std Z+2,r26 + std Z+3,r27 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r26,Y+7 + ldd r27,Y+8 + std Z+4,r18 + std Z+5,r19 + std Z+6,r26 + std Z+7,r27 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r26,Y+11 + ldd r27,Y+12 + std Z+8,r18 + std Z+9,r19 + std Z+10,r26 + std Z+11,r27 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r26,Y+15 + ldd r27,Y+16 + std Z+12,r18 + std Z+13,r19 + std Z+14,r26 + std Z+15,r27 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + std Z+16,r18 + std Z+17,r19 + std Z+18,r26 + std Z+19,r27 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r26,Y+23 + ldd r27,Y+24 + std Z+20,r18 + std Z+21,r19 + std Z+22,r26 + std Z+23,r27 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r26,Y+27 + ldd r27,Y+28 + std Z+24,r18 + std Z+25,r19 + std Z+26,r26 + std Z+27,r27 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r26,Y+31 + ldd r27,Y+32 + std Z+28,r18 + std Z+29,r19 + std Z+30,r26 + std Z+31,r27 + ldd r18,Y+33 + ldd r19,Y+34 + ldd r26,Y+35 + ldd r27,Y+36 + std Z+32,r18 + std Z+33,r19 + std Z+34,r26 + std Z+35,r27 + ldd r18,Y+37 + ldd r19,Y+38 + ldd r26,Y+39 + ldd r27,Y+40 + std Z+36,r18 + std Z+37,r19 + std Z+38,r26 + std Z+39,r27 + ldd r18,Y+41 + ldd r19,Y+42 + ldd r26,Y+43 + ldd r27,Y+44 + std Z+40,r18 + std Z+41,r19 + std Z+42,r26 + std Z+43,r27 + ldd r18,Y+45 + ldd r19,Y+46 + ldd r26,Y+47 + ldd r27,Y+48 + std Z+44,r18 + std Z+45,r19 + std Z+46,r26 + std Z+47,r27 + adiw r28,49 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size forkskinny_128_384_inv_rounds, .-forkskinny_128_384_inv_rounds + + .text +.global forkskinny_128_384_forward_tk + .type forkskinny_128_384_forward_tk, @function +forkskinny_128_384_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r4,Z + ldd r5,Z+1 + ldd r6,Z+2 + ldd r7,Z+3 + ldd r8,Z+4 + ldd r9,Z+5 + ldd r10,Z+6 + ldd r11,Z+7 + ldd r12,Z+8 + ldd r13,Z+9 + ldd r14,Z+10 + ldd r15,Z+11 + ldd r24,Z+12 + ldd r25,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + std Y+17,r18 + std Y+18,r19 + std Y+19,r20 + std Y+20,r21 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r20,Z+38 + ldd r21,Z+39 + std Y+21,r18 + std Y+22,r19 + std Y+23,r20 + std Y+24,r21 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r20,Z+42 + ldd r21,Z+43 + std Y+25,r18 + std Y+26,r19 + std Y+27,r20 + std Y+28,r21 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r20,Z+46 + ldd r21,Z+47 + std Y+29,r18 + std Y+30,r19 + std Y+31,r20 + std Y+32,r21 + push r31 + push r30 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r23,hh8(table_2) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +83: + movw r18,r12 + movw r20,r14 + movw r26,r24 + movw r2,r16 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + mov r4,r19 + mov r5,r3 + mov r6,r18 + mov r7,r27 + mov r8,r20 + mov r9,r2 + mov r10,r26 + mov r11,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r26,Y+13 + ldd r27,Y+14 + ldd r2,Y+15 + ldd r3,Y+16 + ldd r23,Y+1 + std Y+9,r23 + ldd r23,Y+2 + std Y+10,r23 + ldd r23,Y+3 + std Y+11,r23 + ldd r23,Y+4 + std Y+12,r23 + ldd r23,Y+5 + std Y+13,r23 + ldd r23,Y+6 + std Y+14,r23 + ldd r23,Y+7 + std Y+15,r23 + ldd r23,Y+8 + std Y+16,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+1,r19 + std Y+2,r3 + std Y+3,r18 + std Y+4,r27 + std Y+5,r20 + std Y+6,r2 + std Y+7,r26 + std Y+8,r21 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + ldd r26,Y+29 + ldd r27,Y+30 + ldd r2,Y+31 + ldd r3,Y+32 + ldd r23,Y+17 + std Y+25,r23 + ldd r23,Y+18 + std Y+26,r23 + ldd r23,Y+19 + std Y+27,r23 + ldd r23,Y+20 + std Y+28,r23 + ldd r23,Y+21 + std Y+29,r23 + ldd r23,Y+22 + std Y+30,r23 + ldd r23,Y+23 + std Y+31,r23 + ldd r23,Y+24 + std Y+32,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + std Y+17,r19 + std Y+18,r3 + std Y+19,r18 + std Y+20,r27 + std Y+21,r20 + std Y+22,r2 + std Y+23,r26 + std Y+24,r21 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + breq 5183f + rjmp 83b +5183: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r4 + std Z+1,r5 + std Z+2,r6 + std Z+3,r7 + std Z+4,r8 + std Z+5,r9 + std Z+6,r10 + std Z+7,r11 + std Z+8,r12 + std Z+9,r13 + std Z+10,r14 + std Z+11,r15 + std Z+12,r24 + std Z+13,r25 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r20,Y+23 + ldd r21,Y+24 + std Z+36,r18 + std Z+37,r19 + std Z+38,r20 + std Z+39,r21 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + std Z+40,r18 + std Z+41,r19 + std Z+42,r20 + std Z+43,r21 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r20,Y+31 + ldd r21,Y+32 + std Z+44,r18 + std Z+45,r19 + std Z+46,r20 + std Z+47,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_384_forward_tk, .-forkskinny_128_384_forward_tk + + .text +.global forkskinny_128_384_reverse_tk + .type forkskinny_128_384_reverse_tk, @function +forkskinny_128_384_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r16,Z+14 + ldd r17,Z+15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + std Y+1,r18 + std Y+2,r19 + std Y+3,r20 + std Y+4,r21 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + std Y+5,r18 + std Y+6,r19 + std Y+7,r20 + std Y+8,r21 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + std Y+9,r18 + std Y+10,r19 + std Y+11,r20 + std Y+12,r21 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + std Y+13,r18 + std Y+14,r19 + std Y+15,r20 + std Y+16,r21 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + std Y+17,r18 + std Y+18,r19 + std Y+19,r20 + std Y+20,r21 + ldd r18,Z+36 + ldd r19,Z+37 + ldd r20,Z+38 + ldd r21,Z+39 + std Y+21,r18 + std Y+22,r19 + std Y+23,r20 + std Y+24,r21 + ldd r18,Z+40 + ldd r19,Z+41 + ldd r20,Z+42 + ldd r21,Z+43 + std Y+25,r18 + std Y+26,r19 + std Y+27,r20 + std Y+28,r21 + ldd r18,Z+44 + ldd r19,Z+45 + ldd r20,Z+46 + ldd r21,Z+47 + std Y+29,r18 + std Y+30,r19 + std Y+31,r20 + std Y+32,r21 + push r31 + push r30 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r23,hh8(table_3) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +83: + movw r18,r2 + movw r20,r4 + movw r26,r6 + movw r24,r8 + movw r2,r10 + movw r4,r12 + movw r6,r14 + movw r8,r16 + mov r10,r20 + mov r11,r18 + mov r12,r26 + mov r13,r25 + mov r14,r24 + mov r15,r21 + mov r16,r27 + mov r17,r19 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r24,Y+7 + ldd r25,Y+8 + ldd r23,Y+9 + std Y+1,r23 + ldd r23,Y+10 + std Y+2,r23 + ldd r23,Y+11 + std Y+3,r23 + ldd r23,Y+12 + std Y+4,r23 + ldd r23,Y+13 + std Y+5,r23 + ldd r23,Y+14 + std Y+6,r23 + ldd r23,Y+15 + std Y+7,r23 + ldd r23,Y+16 + std Y+8,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+9,r20 + std Y+10,r18 + std Y+11,r26 + std Y+12,r25 + std Y+13,r24 + std Y+14,r21 + std Y+15,r27 + std Y+16,r19 + ldi r30,lo8(table_2) + ldi r31,hi8(table_2) +#if defined(RAMPZ) + ldi r18,hh8(table_2) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + ldd r26,Y+21 + ldd r27,Y+22 + ldd r24,Y+23 + ldd r25,Y+24 + ldd r23,Y+25 + std Y+17,r23 + ldd r23,Y+26 + std Y+18,r23 + ldd r23,Y+27 + std Y+19,r23 + ldd r23,Y+28 + std Y+20,r23 + ldd r23,Y+29 + std Y+21,r23 + ldd r23,Y+30 + std Y+22,r23 + ldd r23,Y+31 + std Y+23,r23 + ldd r23,Y+32 + std Y+24,r23 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r20 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + mov r30,r21 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r24 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + mov r30,r25 +#if defined(RAMPZ) + elpm r25,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r25,Z +#elif defined(__AVR_TINY__) + ld r25,Z +#else + lpm + mov r25,r0 +#endif + std Y+25,r20 + std Y+26,r18 + std Y+27,r26 + std Y+28,r25 + std Y+29,r24 + std Y+30,r21 + std Y+31,r27 + std Y+32,r19 + ldi r30,lo8(table_3) + ldi r31,hi8(table_3) +#if defined(RAMPZ) + ldi r18,hh8(table_3) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + breq 5183f + rjmp 83b +5183: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r2 + std Z+1,r3 + std Z+2,r4 + std Z+3,r5 + std Z+4,r6 + std Z+5,r7 + std Z+6,r8 + std Z+7,r9 + std Z+8,r10 + std Z+9,r11 + std Z+10,r12 + std Z+11,r13 + std Z+12,r14 + std Z+13,r15 + std Z+14,r16 + std Z+15,r17 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r20,Y+3 + ldd r21,Y+4 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r20,Y+7 + ldd r21,Y+8 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r20,Y+11 + ldd r21,Y+12 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r20,Y+15 + ldd r21,Y+16 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ldd r18,Y+17 + ldd r19,Y+18 + ldd r20,Y+19 + ldd r21,Y+20 + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + ldd r18,Y+21 + ldd r19,Y+22 + ldd r20,Y+23 + ldd r21,Y+24 + std Z+36,r18 + std Z+37,r19 + std Z+38,r20 + std Z+39,r21 + ldd r18,Y+25 + ldd r19,Y+26 + ldd r20,Y+27 + ldd r21,Y+28 + std Z+40,r18 + std Z+41,r19 + std Z+42,r20 + std Z+43,r21 + ldd r18,Y+29 + ldd r19,Y+30 + ldd r20,Y+31 + ldd r21,Y+32 + std Z+44,r18 + std Z+45,r19 + std Z+46,r20 + std Z+47,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_128_384_reverse_tk, .-forkskinny_128_384_reverse_tk + + .text +.global forkskinny_64_192_rounds + .type forkskinny_64_192_rounds, @function +forkskinny_64_192_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 38 + ldd r26,Z+24 + ldd r27,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + ld r18,Z + ldd r19,Z+1 + std Y+1,r18 + std Y+2,r19 + ldd r18,Z+2 + ldd r19,Z+3 + std Y+3,r18 + std Y+4,r19 + ldd r18,Z+4 + ldd r19,Z+5 + std Y+5,r18 + std Y+6,r19 + ldd r18,Z+6 + ldd r19,Z+7 + std Y+7,r18 + std Y+8,r19 + ldd r18,Z+8 + ldd r19,Z+9 + std Y+9,r18 + std Y+10,r19 + ldd r18,Z+10 + ldd r19,Z+11 + std Y+11,r18 + std Y+12,r19 + ldd r18,Z+12 + ldd r19,Z+13 + std Y+13,r18 + std Y+14,r19 + ldd r18,Z+14 + ldd r19,Z+15 + std Y+15,r18 + std Y+16,r19 + ldd r18,Z+16 + ldd r19,Z+17 + std Y+17,r18 + std Y+18,r19 + ldd r18,Z+18 + ldd r19,Z+19 + std Y+19,r18 + std Y+20,r19 + ldd r18,Z+20 + ldd r19,Z+21 + std Y+21,r18 + std Y+22,r19 + ldd r18,Z+22 + ldd r19,Z+23 + std Y+23,r18 + std Y+24,r19 + push r31 + push r30 + ldi r30,lo8(table_5) + ldi r31,hi8(table_5) +#if defined(RAMPZ) + ldi r18,hh8(table_5) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 + lsl r20 +61: + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + ldd r18,Y+1 + ldd r19,Y+2 + eor r26,r18 + eor r27,r19 + ldd r18,Y+3 + ldd r19,Y+4 + eor r2,r18 + eor r3,r19 + ldd r18,Y+9 + ldd r19,Y+10 + eor r26,r18 + eor r27,r19 + ldd r18,Y+11 + ldd r19,Y+12 + eor r2,r18 + eor r3,r19 + ldd r18,Y+17 + ldd r19,Y+18 + eor r26,r18 + eor r27,r19 + ldd r18,Y+19 + ldd r19,Y+20 + eor r2,r18 + eor r3,r19 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r27,r18 + inc r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r3,r18 + ldi r18,32 + eor r5,r18 + eor r26,r18 + mov r0,r1 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + lsr r3 + ror r2 + ror r0 + or r3,r0 + mov r0,r4 + mov r4,r5 + mov r5,r0 + mov r0,r6 + mov r6,r7 + mov r7,r0 + mov r0,r1 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + lsr r7 + ror r6 + ror r0 + or r7,r0 + eor r2,r4 + eor r3,r5 + eor r4,r26 + eor r5,r27 + movw r18,r6 + eor r18,r4 + eor r19,r5 + movw r6,r4 + movw r4,r2 + movw r2,r26 + movw r26,r18 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r8,Y+3 + ldd r9,Y+4 + ldd r10,Y+5 + ldd r11,Y+6 + ldd r12,Y+7 + ldd r13,Y+8 + std Y+5,r18 + std Y+6,r19 + std Y+7,r8 + std Y+8,r9 + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + ldi r25,240 + and r9,r25 + swap r12 + ldi r24,15 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+1,r18 + std Y+2,r19 + std Y+3,r8 + std Y+4,r9 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r18,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+9 + ldd r19,Y+10 + ldd r8,Y+11 + ldd r9,Y+12 + ldd r10,Y+13 + ldd r11,Y+14 + ldd r12,Y+15 + ldd r13,Y+16 + std Y+13,r18 + std Y+14,r19 + std Y+15,r8 + std Y+16,r9 + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + and r9,r25 + swap r12 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+9,r18 + std Y+10,r19 + std Y+11,r8 + std Y+12,r9 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r8,Y+19 + ldd r9,Y+20 + ldd r10,Y+21 + ldd r11,Y+22 + ldd r12,Y+23 + ldd r13,Y+24 + std Y+21,r18 + std Y+22,r19 + std Y+23,r8 + std Y+24,r9 + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + mov r19,r11 + swap r19 + andi r19,240 + mov r21,r12 + andi r21,15 + or r19,r21 + mov r18,r11 + andi r18,240 + mov r21,r13 + andi r21,15 + or r18,r21 + mov r9,r10 + and r9,r25 + swap r12 + and r12,r24 + or r9,r12 + mov r8,r13 + and r8,r25 + and r10,r24 + or r8,r10 + std Y+17,r18 + std Y+18,r19 + std Y+19,r8 + std Y+20,r9 + ldi r30,lo8(table_5) + ldi r31,hi8(table_5) +#if defined(RAMPZ) + ldi r18,hh8(table_5) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + inc r22 + cp r22,r20 + breq 5273f + rjmp 61b +5273: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+24,r26 + std Z+25,r27 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Y+1 + ldd r19,Y+2 + st Z,r18 + std Z+1,r19 + ldd r18,Y+3 + ldd r19,Y+4 + std Z+2,r18 + std Z+3,r19 + ldd r18,Y+5 + ldd r19,Y+6 + std Z+4,r18 + std Z+5,r19 + ldd r18,Y+7 + ldd r19,Y+8 + std Z+6,r18 + std Z+7,r19 + ldd r18,Y+9 + ldd r19,Y+10 + std Z+8,r18 + std Z+9,r19 + ldd r18,Y+11 + ldd r19,Y+12 + std Z+10,r18 + std Z+11,r19 + ldd r18,Y+13 + ldd r19,Y+14 + std Z+12,r18 + std Z+13,r19 + ldd r18,Y+15 + ldd r19,Y+16 + std Z+14,r18 + std Z+15,r19 + ldd r18,Y+17 + ldd r19,Y+18 + std Z+16,r18 + std Z+17,r19 + ldd r18,Y+19 + ldd r19,Y+20 + std Z+18,r18 + std Z+19,r19 + ldd r18,Y+21 + ldd r19,Y+22 + std Z+20,r18 + std Z+21,r19 + ldd r18,Y+23 + ldd r19,Y+24 + std Z+22,r18 + std Z+23,r19 + adiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_rounds, .-forkskinny_64_192_rounds + + .text +.global forkskinny_64_192_inv_rounds + .type forkskinny_64_192_inv_rounds, @function +forkskinny_64_192_inv_rounds: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 38 + ldd r26,Z+24 + ldd r27,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + ld r18,Z + ldd r19,Z+1 + std Y+1,r18 + std Y+2,r19 + ldd r18,Z+2 + ldd r19,Z+3 + std Y+3,r18 + std Y+4,r19 + ldd r18,Z+4 + ldd r19,Z+5 + std Y+5,r18 + std Y+6,r19 + ldd r18,Z+6 + ldd r19,Z+7 + std Y+7,r18 + std Y+8,r19 + ldd r18,Z+8 + ldd r19,Z+9 + std Y+9,r18 + std Y+10,r19 + ldd r18,Z+10 + ldd r19,Z+11 + std Y+11,r18 + std Y+12,r19 + ldd r18,Z+12 + ldd r19,Z+13 + std Y+13,r18 + std Y+14,r19 + ldd r18,Z+14 + ldd r19,Z+15 + std Y+15,r18 + std Y+16,r19 + ldd r18,Z+16 + ldd r19,Z+17 + std Y+17,r18 + std Y+18,r19 + ldd r18,Z+18 + ldd r19,Z+19 + std Y+19,r18 + std Y+20,r19 + ldd r18,Z+20 + ldd r19,Z+21 + std Y+21,r18 + std Y+22,r19 + ldd r18,Z+22 + ldd r19,Z+23 + std Y+23,r18 + std Y+24,r19 + push r31 + push r30 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + lsl r22 + lsl r20 +61: + ldd r18,Y+1 + ldd r19,Y+2 + ldd r8,Y+3 + ldd r9,Y+4 + ldd r10,Y+5 + ldd r11,Y+6 + ldd r12,Y+7 + ldd r13,Y+8 + std Y+1,r10 + std Y+2,r11 + std Y+3,r12 + std Y+4,r13 + mov r11,r18 + ldi r25,240 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+5,r10 + std Y+6,r11 + std Y+7,r12 + std Y+8,r13 + ldd r18,Y+9 + ldd r19,Y+10 + ldd r8,Y+11 + ldd r9,Y+12 + ldd r10,Y+13 + ldd r11,Y+14 + ldd r12,Y+15 + ldd r13,Y+16 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r11,r18 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+13,r10 + std Y+14,r11 + std Y+15,r12 + std Y+16,r13 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r18,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + ldd r18,Y+17 + ldd r19,Y+18 + ldd r8,Y+19 + ldd r9,Y+20 + ldd r10,Y+21 + ldd r11,Y+22 + ldd r12,Y+23 + ldd r13,Y+24 + std Y+17,r10 + std Y+18,r11 + std Y+19,r12 + std Y+20,r13 + mov r30,r18 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r19,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r19,Z +#elif defined(__AVR_TINY__) + ld r19,Z +#else + lpm + mov r19,r0 +#endif + mov r30,r8 +#if defined(RAMPZ) + elpm r8,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r8,Z +#elif defined(__AVR_TINY__) + ld r8,Z +#else + lpm + mov r8,r0 +#endif + mov r30,r9 +#if defined(RAMPZ) + elpm r9,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r9,Z +#elif defined(__AVR_TINY__) + ld r9,Z +#else + lpm + mov r9,r0 +#endif + mov r11,r18 + and r11,r25 + mov r21,r19 + swap r21 + andi r21,15 + or r11,r21 + mov r10,r9 + and r10,r25 + mov r21,r8 + andi r21,15 + or r10,r21 + mov r13,r8 + and r13,r25 + andi r18,15 + or r13,r18 + mov r12,r9 + swap r12 + and r12,r25 + andi r19,15 + or r12,r19 + std Y+21,r10 + std Y+22,r11 + std Y+23,r12 + std Y+24,r13 + movw r18,r26 + movw r26,r2 + movw r2,r4 + movw r4,r6 + movw r6,r18 + eor r6,r4 + eor r7,r5 + eor r4,r26 + eor r5,r27 + eor r2,r4 + eor r3,r5 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + mov r0,r5 + mov r5,r4 + mov r4,r0 + mov r0,r7 + mov r7,r6 + mov r6,r0 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + ldd r18,Y+1 + ldd r19,Y+2 + eor r26,r18 + eor r27,r19 + ldd r18,Y+3 + ldd r19,Y+4 + eor r2,r18 + eor r3,r19 + ldd r18,Y+9 + ldd r19,Y+10 + eor r26,r18 + eor r27,r19 + ldd r18,Y+11 + ldd r19,Y+12 + eor r2,r18 + eor r3,r19 + ldd r18,Y+17 + ldd r19,Y+18 + eor r26,r18 + eor r27,r19 + ldd r18,Y+19 + ldd r19,Y+20 + eor r2,r18 + eor r3,r19 + ldi r30,lo8(table_4) + ldi r31,hi8(table_4) +#if defined(RAMPZ) + ldi r18,hh8(table_4) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r3,r18 + dec r22 + mov r30,r22 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + swap r18 + eor r27,r18 + ldi r18,32 + eor r5,r18 + eor r26,r18 + ldi r30,lo8(table_6) + ldi r31,hi8(table_6) +#if defined(RAMPZ) + ldi r18,hh8(table_6) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + mov r30,r26 +#if defined(RAMPZ) + elpm r26,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r26,Z +#elif defined(__AVR_TINY__) + ld r26,Z +#else + lpm + mov r26,r0 +#endif + mov r30,r27 +#if defined(RAMPZ) + elpm r27,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r27,Z +#elif defined(__AVR_TINY__) + ld r27,Z +#else + lpm + mov r27,r0 +#endif + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + mov r30,r6 +#if defined(RAMPZ) + elpm r6,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r6,Z +#elif defined(__AVR_TINY__) + ld r6,Z +#else + lpm + mov r6,r0 +#endif + mov r30,r7 +#if defined(RAMPZ) + elpm r7,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r7,Z +#elif defined(__AVR_TINY__) + ld r7,Z +#else + lpm + mov r7,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r18,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r18 +#endif + cp r22,r20 + breq 5268f + rjmp 61b +5268: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + std Z+24,r26 + std Z+25,r27 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Y+1 + ldd r19,Y+2 + st Z,r18 + std Z+1,r19 + ldd r18,Y+3 + ldd r19,Y+4 + std Z+2,r18 + std Z+3,r19 + ldd r18,Y+5 + ldd r19,Y+6 + std Z+4,r18 + std Z+5,r19 + ldd r18,Y+7 + ldd r19,Y+8 + std Z+6,r18 + std Z+7,r19 + ldd r18,Y+9 + ldd r19,Y+10 + std Z+8,r18 + std Z+9,r19 + ldd r18,Y+11 + ldd r19,Y+12 + std Z+10,r18 + std Z+11,r19 + ldd r18,Y+13 + ldd r19,Y+14 + std Z+12,r18 + std Z+13,r19 + ldd r18,Y+15 + ldd r19,Y+16 + std Z+14,r18 + std Z+15,r19 + ldd r18,Y+17 + ldd r19,Y+18 + std Z+16,r18 + std Z+17,r19 + ldd r18,Y+19 + ldd r19,Y+20 + std Z+18,r18 + std Z+19,r19 + ldd r18,Y+21 + ldd r19,Y+22 + std Z+20,r18 + std Z+21,r19 + ldd r18,Y+23 + ldd r19,Y+24 + std Z+22,r18 + std Z+23,r19 + adiw r28,24 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_inv_rounds, .-forkskinny_64_192_inv_rounds + + .text +.global forkskinny_64_192_forward_tk + .type forkskinny_64_192_forward_tk, @function +forkskinny_64_192_forward_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r28,Z+6 + ldd r29,Z+7 + ldd r2,Z+8 + ldd r3,Z+9 + ldd r4,Z+10 + ldd r5,Z+11 + ldd r6,Z+12 + ldd r7,Z+13 + ldd r8,Z+14 + ldd r9,Z+15 + ldd r10,Z+16 + ldd r11,Z+17 + ldd r12,Z+18 + ldd r13,Z+19 + ldd r14,Z+20 + ldd r15,Z+21 + ldd r24,Z+22 + ldd r25,Z+23 + push r31 + push r30 + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +27: + push r19 + push r18 + push r21 + push r20 + mov r19,r27 + swap r19 + andi r19,240 + mov r23,r28 + andi r23,15 + or r19,r23 + mov r18,r27 + andi r18,240 + mov r23,r29 + andi r23,15 + or r18,r23 + mov r21,r26 + andi r21,240 + swap r28 + andi r28,15 + or r21,r28 + mov r20,r29 + andi r20,240 + andi r26,15 + or r20,r26 + pop r28 + pop r29 + pop r26 + pop r27 + push r3 + push r2 + push r5 + push r4 + mov r3,r7 + swap r3 + ldi r17,240 + and r3,r17 + mov r23,r8 + andi r23,15 + or r3,r23 + mov r2,r7 + and r2,r17 + mov r23,r9 + andi r23,15 + or r2,r23 + mov r5,r6 + and r5,r17 + swap r8 + ldi r16,15 + and r8,r16 + or r5,r8 + mov r4,r9 + and r4,r17 + and r6,r16 + or r4,r6 + pop r8 + pop r9 + pop r6 + pop r7 + push r11 + push r10 + push r13 + push r12 + mov r11,r15 + swap r11 + and r11,r17 + mov r23,r24 + andi r23,15 + or r11,r23 + mov r10,r15 + and r10,r17 + mov r23,r25 + andi r23,15 + or r10,r23 + mov r13,r14 + and r13,r17 + swap r24 + andi r24,15 + or r13,r24 + mov r12,r25 + and r12,r17 + and r14,r16 + or r12,r14 + pop r24 + pop r25 + pop r14 + pop r15 + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + dec r22 + breq 5125f + rjmp 27b +5125: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r26 + std Z+5,r27 + std Z+6,r28 + std Z+7,r29 + std Z+8,r2 + std Z+9,r3 + std Z+10,r4 + std Z+11,r5 + std Z+12,r6 + std Z+13,r7 + std Z+14,r8 + std Z+15,r9 + std Z+16,r10 + std Z+17,r11 + std Z+18,r12 + std Z+19,r13 + std Z+20,r14 + std Z+21,r15 + std Z+22,r24 + std Z+23,r25 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_forward_tk, .-forkskinny_64_192_forward_tk + + .text +.global forkskinny_64_192_reverse_tk + .type forkskinny_64_192_reverse_tk, @function +forkskinny_64_192_reverse_tk: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r28,Z+6 + ldd r29,Z+7 + ldd r2,Z+8 + ldd r3,Z+9 + ldd r4,Z+10 + ldd r5,Z+11 + ldd r6,Z+12 + ldd r7,Z+13 + ldd r8,Z+14 + ldd r9,Z+15 + ldd r10,Z+16 + ldd r11,Z+17 + ldd r12,Z+18 + ldd r13,Z+19 + ldd r14,Z+20 + ldd r15,Z+21 + ldd r24,Z+22 + ldd r25,Z+23 + push r31 + push r30 + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif +27: + mov r30,r2 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + mov r30,r3 +#if defined(RAMPZ) + elpm r3,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r3,Z +#elif defined(__AVR_TINY__) + ld r3,Z +#else + lpm + mov r3,r0 +#endif + mov r30,r4 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + mov r30,r5 +#if defined(RAMPZ) + elpm r5,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r5,Z +#elif defined(__AVR_TINY__) + ld r5,Z +#else + lpm + mov r5,r0 +#endif + ldi r30,lo8(table_7) + ldi r31,hi8(table_7) +#if defined(RAMPZ) + ldi r23,hh8(table_7) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + mov r30,r10 +#if defined(RAMPZ) + elpm r10,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r10,Z +#elif defined(__AVR_TINY__) + ld r10,Z +#else + lpm + mov r10,r0 +#endif + mov r30,r11 +#if defined(RAMPZ) + elpm r11,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r11,Z +#elif defined(__AVR_TINY__) + ld r11,Z +#else + lpm + mov r11,r0 +#endif + mov r30,r12 +#if defined(RAMPZ) + elpm r12,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r12,Z +#elif defined(__AVR_TINY__) + ld r12,Z +#else + lpm + mov r12,r0 +#endif + mov r30,r13 +#if defined(RAMPZ) + elpm r13,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r13,Z +#elif defined(__AVR_TINY__) + ld r13,Z +#else + lpm + mov r13,r0 +#endif + ldi r30,lo8(table_8) + ldi r31,hi8(table_8) +#if defined(RAMPZ) + ldi r23,hh8(table_8) + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + push r27 + push r26 + push r29 + push r28 + mov r27,r18 + andi r27,240 + mov r23,r19 + swap r23 + andi r23,15 + or r27,r23 + mov r26,r21 + andi r26,240 + mov r23,r20 + andi r23,15 + or r26,r23 + mov r29,r20 + andi r29,240 + andi r18,15 + or r29,r18 + mov r28,r21 + swap r28 + andi r28,240 + andi r19,15 + or r28,r19 + pop r20 + pop r21 + pop r18 + pop r19 + push r7 + push r6 + push r9 + push r8 + mov r7,r2 + ldi r17,240 + and r7,r17 + mov r23,r3 + swap r23 + andi r23,15 + or r7,r23 + mov r6,r5 + and r6,r17 + mov r23,r4 + andi r23,15 + or r6,r23 + mov r9,r4 + and r9,r17 + ldi r16,15 + and r2,r16 + or r9,r2 + mov r8,r5 + swap r8 + and r8,r17 + and r3,r16 + or r8,r3 + pop r4 + pop r5 + pop r2 + pop r3 + push r15 + push r14 + push r25 + push r24 + mov r15,r10 + and r15,r17 + mov r23,r11 + swap r23 + andi r23,15 + or r15,r23 + mov r14,r13 + and r14,r17 + mov r23,r12 + andi r23,15 + or r14,r23 + mov r25,r12 + andi r25,240 + and r10,r16 + or r25,r10 + mov r24,r13 + swap r24 + andi r24,240 + and r11,r16 + or r24,r11 + pop r12 + pop r13 + pop r10 + pop r11 + dec r22 + breq 5125f + rjmp 27b +5125: +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r26 + std Z+5,r27 + std Z+6,r28 + std Z+7,r29 + std Z+8,r2 + std Z+9,r3 + std Z+10,r4 + std Z+11,r5 + std Z+12,r6 + std Z+13,r7 + std Z+14,r8 + std Z+15,r9 + std Z+16,r10 + std Z+17,r11 + std Z+18,r12 + std Z+19,r13 + std Z+20,r14 + std Z+21,r15 + std Z+22,r24 + std Z+23,r25 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size forkskinny_64_192_reverse_tk, .-forkskinny_64_192_reverse_tk + +#endif diff --git a/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/rhys/internal-forkskinny.c b/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/rhys/internal-forkskinny.c index b050ff1..6e2ac55 100644 --- a/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/rhys/internal-forkskinny.c +++ b/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/rhys/internal-forkskinny.c @@ -40,35 +40,10 @@ static unsigned char const RC[87] = { 0x4a, 0x14, 0x29, 0x52, 0x24, 0x48, 0x10 }; -/** - * \brief Number of rounds of ForkSkinny-128-256 before forking. - */ -#define FORKSKINNY_128_256_ROUNDS_BEFORE 21 - -/** - * \brief Number of rounds of ForkSkinny-128-256 after forking. - */ -#define FORKSKINNY_128_256_ROUNDS_AFTER 27 - -/** - * \brief State information for ForkSkinny-128-256. - */ -typedef struct -{ - uint32_t TK1[4]; /**< First part of the tweakey */ - uint32_t TK2[4]; /**< Second part of the tweakey */ - uint32_t S[4]; /**< Current block state */ +#if !defined(__AVR__) -} forkskinny_128_256_state_t; - -/** - * \brief Applies one round of ForkSkinny-128-256. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_256_round - (forkskinny_128_256_state_t *state, unsigned round) +void forkskinny_128_256_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -79,137 +54,52 @@ static void forkskinny_128_256_round s2 = state->S[2]; s3 = state->S[3]; - /* Apply the S-box to all cells in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny128_sbox(s0); + skinny128_sbox(s1); + skinny128_sbox(s2); + skinny128_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); + s2 ^= 0x02; + + /* Shift the cells in the rows right, which moves the cell + * values up closer to the MSB. That is, we do a left rotate + * on the word to rotate the cells in the word right */ + s1 = leftRotate8(s1); + s2 = leftRotate16(s2); + s3 = leftRotate24(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + + /* Permute TK1 and TK2 for the next round */ + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + } /* Save the local variables back to the state */ state->S[0] = s0; state->S[1] = s1; state->S[2] = s2; state->S[3] = s3; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(state->TK1); - skinny128_permute_tk(state->TK2); - skinny128_LFSR2(state->TK2[0]); - skinny128_LFSR2(state->TK2[1]); -} - -void forkskinny_128_256_encrypt - (const unsigned char key[32], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_256_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_128_256_ROUNDS_BEFORE; ++round) { - forkskinny_128_256_round(&state, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint32_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&state, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x08040201U; /* Branching constant */ - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - for (round = (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); ++round) { - forkskinny_128_256_round(&state, round); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&state, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - } } -/** - * \brief Applies one round of ForkSkinny-128-256 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_256_inv_round - (forkskinny_128_256_state_t *state, unsigned round) +void forkskinny_128_256_inv_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -220,39 +110,42 @@ static void forkskinny_128_256_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Permute TK1 and TK2 for the next round */ - skinny128_inv_LFSR2(state->TK2[0]); - skinny128_inv_LFSR2(state->TK2[1]); - skinny128_inv_permute_tk(state->TK1); - skinny128_inv_permute_tk(state->TK2); - - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left, which moves the cell - * values down closer to the LSB. That is, we do a right - * rotate on the word to rotate the cells in the word left */ - s1 = rightRotate8(s1); - s2 = rightRotate16(s2); - s3 = rightRotate24(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1 and TK2 for the next round */ + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left, which moves the cell + * values down closer to the LSB. That is, we do a right + * rotate on the word to rotate the cells in the word left */ + s1 = rightRotate8(s1); + s2 = rightRotate16(s2); + s3 = rightRotate24(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ (rc & 0x0F) ^ 0x00020000; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ (rc >> 4); + s2 ^= 0x02; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny128_inv_sbox(s0); + skinny128_inv_sbox(s1); + skinny128_inv_sbox(s2); + skinny128_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -261,115 +154,64 @@ static void forkskinny_128_256_inv_round state->S[3] = s3; } -void forkskinny_128_256_decrypt - (const unsigned char key[32], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_128_256_forward_tk + (forkskinny_128_256_state_t *state, unsigned rounds) { - forkskinny_128_256_state_t state; - forkskinny_128_256_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); ++round) { - skinny128_permute_tk(state.TK1); - skinny128_permute_tk(state.TK2); - skinny128_LFSR2(state.TK2[0]); - skinny128_LFSR2(state.TK2[1]); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR2(state->TK2[2]); + skinny128_LFSR2(state->TK2[3]); + } + rounds -= 16; } - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER * 2); - round > (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); --round) { - forkskinny_128_256_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + --rounds; } +} - /* Remove the branching constant */ - state.S[0] ^= 0x08040201U; - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_128_256_ROUNDS_AFTER; ++round) { - skinny128_inv_LFSR2(state.TK2[0]); - skinny128_inv_LFSR2(state.TK2[1]); - skinny128_inv_permute_tk(state.TK1); - skinny128_inv_permute_tk(state.TK2); +void forkskinny_128_256_reverse_tk + (forkskinny_128_256_state_t *state, unsigned rounds) +{ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR2(state->TK2[2]); + skinny128_inv_LFSR2(state->TK2[3]); + } + rounds -= 16; } - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_128_256_inv_round(&state, round - 1); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_128_256_ROUNDS_BEFORE; - round < (FORKSKINNY_128_256_ROUNDS_BEFORE + - FORKSKINNY_128_256_ROUNDS_AFTER); ++round) { - forkskinny_128_256_round(&fstate, round); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + --rounds; } - le_store_word32(output_right, fstate.S[0]); - le_store_word32(output_right + 4, fstate.S[1]); - le_store_word32(output_right + 8, fstate.S[2]); - le_store_word32(output_right + 12, fstate.S[3]); } -/** - * \brief Number of rounds of ForkSkinny-128-384 before forking. - */ -#define FORKSKINNY_128_384_ROUNDS_BEFORE 25 - -/** - * \brief Number of rounds of ForkSkinny-128-384 after forking. - */ -#define FORKSKINNY_128_384_ROUNDS_AFTER 31 - -/** - * \brief State information for ForkSkinny-128-384. - */ -typedef struct -{ - uint32_t TK1[4]; /**< First part of the tweakey */ - uint32_t TK2[4]; /**< Second part of the tweakey */ - uint32_t TK3[4]; /**< Third part of the tweakey */ - uint32_t S[4]; /**< Current block state */ - -} forkskinny_128_384_state_t; - -/** - * \brief Applies one round of ForkSkinny-128-384. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_384_round - (forkskinny_128_384_state_t *state, unsigned round) +void forkskinny_128_384_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -380,145 +222,56 @@ static void forkskinny_128_384_round s2 = state->S[2]; s3 = state->S[3]; - /* Apply the S-box to all cells in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - (rc & 0x0F) ^ 0x00020000; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny128_sbox(s0); + skinny128_sbox(s1); + skinny128_sbox(s2); + skinny128_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + (rc & 0x0F) ^ 0x00020000; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); + s2 ^= 0x02; + + /* Shift the cells in the rows right, which moves the cell + * values up closer to the MSB. That is, we do a left rotate + * on the word to rotate the cells in the word right */ + s1 = leftRotate8(s1); + s2 = leftRotate16(s2); + s3 = leftRotate24(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + + /* Permute TK1, TK2, and TK3 for the next round */ + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_permute_tk(state->TK3); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + } /* Save the local variables back to the state */ state->S[0] = s0; state->S[1] = s1; state->S[2] = s2; state->S[3] = s3; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny128_permute_tk(state->TK1); - skinny128_permute_tk(state->TK2); - skinny128_permute_tk(state->TK3); - skinny128_LFSR2(state->TK2[0]); - skinny128_LFSR2(state->TK2[1]); - skinny128_LFSR3(state->TK3[0]); - skinny128_LFSR3(state->TK3[1]); } -void forkskinny_128_384_encrypt - (const unsigned char key[48], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_128_384_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.TK3[0] = le_load_word32(key + 32); - state.TK3[1] = le_load_word32(key + 36); - state.TK3[2] = le_load_word32(key + 40); - state.TK3[3] = le_load_word32(key + 44); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_128_384_ROUNDS_BEFORE; ++round) { - forkskinny_128_384_round(&state, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint32_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&state, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x08040201U; /* Branching constant */ - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - for (round = (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); ++round) { - forkskinny_128_384_round(&state, round); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&state, round); - } - le_store_word32(output_right, state.S[0]); - le_store_word32(output_right + 4, state.S[1]); - le_store_word32(output_right + 8, state.S[2]); - le_store_word32(output_right + 12, state.S[3]); - } -} - -/** - * \brief Applies one round of ForkSkinny-128-384 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_128_384_inv_round - (forkskinny_128_384_state_t *state, unsigned round) +void forkskinny_128_384_inv_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last) { uint32_t s0, s1, s2, s3, temp; uint8_t rc; @@ -529,43 +282,46 @@ static void forkskinny_128_384_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Permute TK1 and TK2 for the next round */ - skinny128_inv_LFSR2(state->TK2[0]); - skinny128_inv_LFSR2(state->TK2[1]); - skinny128_inv_LFSR3(state->TK3[0]); - skinny128_inv_LFSR3(state->TK3[1]); - skinny128_inv_permute_tk(state->TK1); - skinny128_inv_permute_tk(state->TK2); - skinny128_inv_permute_tk(state->TK3); - - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left, which moves the cell - * values down closer to the LSB. That is, we do a right - * rotate on the word to rotate the cells in the word left */ - s1 = rightRotate8(s1); - s2 = rightRotate16(s2); - s3 = rightRotate24(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - (rc & 0x0F) ^ 0x00020000; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1 and TK2 for the next round */ + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + skinny128_inv_permute_tk(state->TK3); + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left, which moves the cell + * values down closer to the LSB. That is, we do a right + * rotate on the word to rotate the cells in the word left */ + s1 = rightRotate8(s1); + s2 = rightRotate16(s2); + s3 = rightRotate24(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + (rc & 0x0F) ^ 0x00020000; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ (rc >> 4); + s2 ^= 0x02; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny128_inv_sbox(s0); + skinny128_inv_sbox(s1); + skinny128_inv_sbox(s2); + skinny128_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -574,128 +330,78 @@ static void forkskinny_128_384_inv_round state->S[3] = s3; } -void forkskinny_128_384_decrypt - (const unsigned char key[48], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_128_384_forward_tk + (forkskinny_128_384_state_t *state, unsigned rounds) { - forkskinny_128_384_state_t state; - forkskinny_128_384_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = le_load_word32(key); - state.TK1[1] = le_load_word32(key + 4); - state.TK1[2] = le_load_word32(key + 8); - state.TK1[3] = le_load_word32(key + 12); - state.TK2[0] = le_load_word32(key + 16); - state.TK2[1] = le_load_word32(key + 20); - state.TK2[2] = le_load_word32(key + 24); - state.TK2[3] = le_load_word32(key + 28); - state.TK3[0] = le_load_word32(key + 32); - state.TK3[1] = le_load_word32(key + 36); - state.TK3[2] = le_load_word32(key + 40); - state.TK3[3] = le_load_word32(key + 44); - state.S[0] = le_load_word32(input); - state.S[1] = le_load_word32(input + 4); - state.S[2] = le_load_word32(input + 8); - state.S[3] = le_load_word32(input + 12); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); ++round) { - skinny128_permute_tk(state.TK1); - skinny128_permute_tk(state.TK2); - skinny128_permute_tk(state.TK3); - skinny128_LFSR2(state.TK2[0]); - skinny128_LFSR2(state.TK2[1]); - skinny128_LFSR3(state.TK3[0]); - skinny128_LFSR3(state.TK3[1]); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR2(state->TK2[2]); + skinny128_LFSR2(state->TK2[3]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + skinny128_LFSR3(state->TK3[2]); + skinny128_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER * 2); - round > (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); --round) { - forkskinny_128_384_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_permute_tk(state->TK1); + skinny128_permute_tk(state->TK2); + skinny128_permute_tk(state->TK3); + skinny128_LFSR2(state->TK2[0]); + skinny128_LFSR2(state->TK2[1]); + skinny128_LFSR3(state->TK3[0]); + skinny128_LFSR3(state->TK3[1]); + --rounds; } +} - /* Remove the branching constant */ - state.S[0] ^= 0x08040201U; - state.S[1] ^= 0x82412010U; - state.S[2] ^= 0x28140a05U; - state.S[3] ^= 0x8844a251U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_128_384_ROUNDS_AFTER; ++round) { - skinny128_inv_LFSR2(state.TK2[0]); - skinny128_inv_LFSR2(state.TK2[1]); - skinny128_inv_LFSR3(state.TK3[0]); - skinny128_inv_LFSR3(state.TK3[1]); - skinny128_inv_permute_tk(state.TK1); - skinny128_inv_permute_tk(state.TK2); - skinny128_inv_permute_tk(state.TK3); +void forkskinny_128_384_reverse_tk + (forkskinny_128_384_state_t *state, unsigned rounds) +{ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny128_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR2(state->TK2[2]); + skinny128_inv_LFSR2(state->TK2[3]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_LFSR3(state->TK3[2]); + skinny128_inv_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_128_384_inv_round(&state, round - 1); - } - le_store_word32(output_left, state.S[0]); - le_store_word32(output_left + 4, state.S[1]); - le_store_word32(output_left + 8, state.S[2]); - le_store_word32(output_left + 12, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_128_384_ROUNDS_BEFORE; - round < (FORKSKINNY_128_384_ROUNDS_BEFORE + - FORKSKINNY_128_384_ROUNDS_AFTER); ++round) { - forkskinny_128_384_round(&fstate, round); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny128_inv_LFSR2(state->TK2[0]); + skinny128_inv_LFSR2(state->TK2[1]); + skinny128_inv_LFSR3(state->TK3[0]); + skinny128_inv_LFSR3(state->TK3[1]); + skinny128_inv_permute_tk(state->TK1); + skinny128_inv_permute_tk(state->TK2); + skinny128_inv_permute_tk(state->TK3); + --rounds; } - le_store_word32(output_right, fstate.S[0]); - le_store_word32(output_right + 4, fstate.S[1]); - le_store_word32(output_right + 8, fstate.S[2]); - le_store_word32(output_right + 12, fstate.S[3]); } -/** - * \brief Number of rounds of ForkSkinny-64-192 before forking. - */ -#define FORKSKINNY_64_192_ROUNDS_BEFORE 17 - -/** - * \brief Number of rounds of ForkSkinny-64-192 after forking. - */ -#define FORKSKINNY_64_192_ROUNDS_AFTER 23 - -/** - * \brief State information for ForkSkinny-64-192. - */ -typedef struct -{ - uint16_t TK1[4]; /**< First part of the tweakey */ - uint16_t TK2[4]; /**< Second part of the tweakey */ - uint16_t TK3[4]; /**< Third part of the tweakey */ - uint16_t S[4]; /**< Current block state */ - -} forkskinny_64_192_state_t; - -/** - * \brief Applies one round of ForkSkinny-64-192. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - * - * Note: The cells of each row are order in big-endian nibble order - * so it is easiest to manage the rows in bit-endian byte order. - */ -static void forkskinny_64_192_round - (forkskinny_64_192_state_t *state, unsigned round) +void forkskinny_64_192_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last) { uint16_t s0, s1, s2, s3, temp; uint8_t rc; @@ -706,144 +412,55 @@ static void forkskinny_64_192_round s2 = state->S[2]; s3 = state->S[3]; - /* Apply the S-box to all cells in the state */ - skinny64_sbox(s0); - skinny64_sbox(s1); - skinny64_sbox(s2); - skinny64_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - ((rc & 0x0F) << 12) ^ 0x0020; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ - ((rc & 0x70) << 8); - s2 ^= 0x2000; - - /* Shift the cells in the rows right */ - s1 = rightRotate4_16(s1); - s2 = rightRotate8_16(s2); - s3 = rightRotate12_16(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; + /* Perform all requested rounds */ + for (; first < last; ++first) { + /* Apply the S-box to all cells in the state */ + skinny64_sbox(s0); + skinny64_sbox(s1); + skinny64_sbox(s2); + skinny64_sbox(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + ((rc & 0x0F) << 12) ^ 0x0020; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ + ((rc & 0x70) << 8); + s2 ^= 0x2000; + + /* Shift the cells in the rows right */ + s1 = rightRotate4_16(s1); + s2 = rightRotate8_16(s2); + s3 = rightRotate12_16(s3); + + /* Mix the columns */ + s1 ^= s2; + s2 ^= s0; + temp = s3 ^ s2; + s3 = s2; + s2 = s1; + s1 = s0; + s0 = temp; + + /* Permute TK1, TK2, and TK3 for the next round */ + skinny64_permute_tk(state->TK1); + skinny64_permute_tk(state->TK2); + skinny64_permute_tk(state->TK3); + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + } /* Save the local variables back to the state */ state->S[0] = s0; state->S[1] = s1; state->S[2] = s2; state->S[3] = s3; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny64_permute_tk(state->TK1); - skinny64_permute_tk(state->TK2); - skinny64_permute_tk(state->TK3); - skinny64_LFSR2(state->TK2[0]); - skinny64_LFSR2(state->TK2[1]); - skinny64_LFSR3(state->TK3[0]); - skinny64_LFSR3(state->TK3[1]); } -void forkskinny_64_192_encrypt - (const unsigned char key[24], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) -{ - forkskinny_64_192_state_t state; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = be_load_word16(key); - state.TK1[1] = be_load_word16(key + 2); - state.TK1[2] = be_load_word16(key + 4); - state.TK1[3] = be_load_word16(key + 6); - state.TK2[0] = be_load_word16(key + 8); - state.TK2[1] = be_load_word16(key + 10); - state.TK2[2] = be_load_word16(key + 12); - state.TK2[3] = be_load_word16(key + 14); - state.TK3[0] = be_load_word16(key + 16); - state.TK3[1] = be_load_word16(key + 18); - state.TK3[2] = be_load_word16(key + 20); - state.TK3[3] = be_load_word16(key + 22); - state.S[0] = be_load_word16(input); - state.S[1] = be_load_word16(input + 2); - state.S[2] = be_load_word16(input + 4); - state.S[3] = be_load_word16(input + 6); - - /* Run all of the rounds before the forking point */ - for (round = 0; round < FORKSKINNY_64_192_ROUNDS_BEFORE; ++round) { - forkskinny_64_192_round(&state, round); - } - - /* Determine which output blocks we need */ - if (output_left && output_right) { - /* We need both outputs so save the state at the forking point */ - uint16_t F[4]; - F[0] = state.S[0]; - F[1] = state.S[1]; - F[2] = state.S[2]; - F[3] = state.S[3]; - - /* Generate the right output block */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_right, state.S[0]); - be_store_word16(output_right + 2, state.S[1]); - be_store_word16(output_right + 4, state.S[2]); - be_store_word16(output_right + 6, state.S[3]); - - /* Restore the state at the forking point */ - state.S[0] = F[0]; - state.S[1] = F[1]; - state.S[2] = F[2]; - state.S[3] = F[3]; - } - if (output_left) { - /* Generate the left output block */ - state.S[0] ^= 0x1249U; /* Branching constant */ - state.S[1] ^= 0x36daU; - state.S[2] ^= 0x5b7fU; - state.S[3] ^= 0xec81U; - for (round = (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_left, state.S[0]); - be_store_word16(output_left + 2, state.S[1]); - be_store_word16(output_left + 4, state.S[2]); - be_store_word16(output_left + 6, state.S[3]); - } else { - /* We only need the right output block */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&state, round); - } - be_store_word16(output_right, state.S[0]); - be_store_word16(output_right + 2, state.S[1]); - be_store_word16(output_right + 4, state.S[2]); - be_store_word16(output_right + 6, state.S[3]); - } -} - -/** - * \brief Applies one round of ForkSkinny-64-192 in reverse. - * - * \param state State to apply the round to. - * \param round Number of the round to apply. - */ -static void forkskinny_64_192_inv_round - (forkskinny_64_192_state_t *state, unsigned round) +void forkskinny_64_192_inv_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last) { uint16_t s0, s1, s2, s3, temp; uint8_t rc; @@ -854,42 +471,45 @@ static void forkskinny_64_192_inv_round s2 = state->S[2]; s3 = state->S[3]; - /* Permute TK1, TK2, and TK3 for the next round */ - skinny64_inv_LFSR2(state->TK2[0]); - skinny64_inv_LFSR2(state->TK2[1]); - skinny64_inv_LFSR3(state->TK3[0]); - skinny64_inv_LFSR3(state->TK3[1]); - skinny64_inv_permute_tk(state->TK1); - skinny64_inv_permute_tk(state->TK2); - skinny64_inv_permute_tk(state->TK3); - - /* Inverse mix of the columns */ - temp = s0; - s0 = s1; - s1 = s2; - s2 = s3; - s3 = temp ^ s2; - s2 ^= s0; - s1 ^= s2; - - /* Shift the cells in the rows left */ - s1 = leftRotate4_16(s1); - s2 = leftRotate8_16(s2); - s3 = leftRotate12_16(s3); - - /* XOR the round constant and the subkey for this round */ - rc = RC[round]; - s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ - ((rc & 0x0F) << 12) ^ 0x0020; - s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ - ((rc & 0x70) << 8); - s2 ^= 0x2000; - - /* Apply the inverse of the S-box to all cells in the state */ - skinny64_inv_sbox(s0); - skinny64_inv_sbox(s1); - skinny64_inv_sbox(s2); - skinny64_inv_sbox(s3); + /* Perform all requested rounds */ + while (first > last) { + /* Permute TK1, TK2, and TK3 for the next round */ + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_permute_tk(state->TK1); + skinny64_inv_permute_tk(state->TK2); + skinny64_inv_permute_tk(state->TK3); + + /* Inverse mix of the columns */ + temp = s0; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = temp ^ s2; + s2 ^= s0; + s1 ^= s2; + + /* Shift the cells in the rows left */ + s1 = leftRotate4_16(s1); + s2 = leftRotate8_16(s2); + s3 = leftRotate12_16(s3); + + /* XOR the round constant and the subkey for this round */ + rc = RC[--first]; + s0 ^= state->TK1[0] ^ state->TK2[0] ^ state->TK3[0] ^ + ((rc & 0x0F) << 12) ^ 0x0020; + s1 ^= state->TK1[1] ^ state->TK2[1] ^ state->TK3[1] ^ + ((rc & 0x70) << 8); + s2 ^= 0x2000; + + /* Apply the inverse of the S-box to all cells in the state */ + skinny64_inv_sbox(s0); + skinny64_inv_sbox(s1); + skinny64_inv_sbox(s2); + skinny64_inv_sbox(s3); + } /* Save the local variables back to the state */ state->S[0] = s0; @@ -898,91 +518,74 @@ static void forkskinny_64_192_inv_round state->S[3] = s3; } -void forkskinny_64_192_decrypt - (const unsigned char key[24], unsigned char *output_left, - unsigned char *output_right, const unsigned char *input) +void forkskinny_64_192_forward_tk + (forkskinny_64_192_state_t *state, unsigned rounds) { - forkskinny_64_192_state_t state; - forkskinny_64_192_state_t fstate; - unsigned round; - - /* Unpack the tweakey and the input */ - state.TK1[0] = be_load_word16(key); - state.TK1[1] = be_load_word16(key + 2); - state.TK1[2] = be_load_word16(key + 4); - state.TK1[3] = be_load_word16(key + 6); - state.TK2[0] = be_load_word16(key + 8); - state.TK2[1] = be_load_word16(key + 10); - state.TK2[2] = be_load_word16(key + 12); - state.TK2[3] = be_load_word16(key + 14); - state.TK3[0] = be_load_word16(key + 16); - state.TK3[1] = be_load_word16(key + 18); - state.TK3[2] = be_load_word16(key + 20); - state.TK3[3] = be_load_word16(key + 22); - state.S[0] = be_load_word16(input); - state.S[1] = be_load_word16(input + 2); - state.S[2] = be_load_word16(input + 4); - state.S[3] = be_load_word16(input + 6); - - /* Fast-forward the tweakey to the end of the key schedule */ - for (round = 0; round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); ++round) { - skinny64_permute_tk(state.TK1); - skinny64_permute_tk(state.TK2); - skinny64_permute_tk(state.TK3); - skinny64_LFSR2(state.TK2[0]); - skinny64_LFSR2(state.TK2[1]); - skinny64_LFSR3(state.TK3[0]); - skinny64_LFSR3(state.TK3[1]); + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny64_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR2(state->TK2[2]); + skinny64_LFSR2(state->TK2[3]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + skinny64_LFSR3(state->TK3[2]); + skinny64_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Perform the "after" rounds on the input to get back - * to the forking point in the cipher */ - for (round = (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER * 2); - round > (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); --round) { - forkskinny_64_192_inv_round(&state, round - 1); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny64_permute_tk(state->TK1); + skinny64_permute_tk(state->TK2); + skinny64_permute_tk(state->TK3); + skinny64_LFSR2(state->TK2[0]); + skinny64_LFSR2(state->TK2[1]); + skinny64_LFSR3(state->TK3[0]); + skinny64_LFSR3(state->TK3[1]); + --rounds; } +} - /* Remove the branching constant */ - state.S[0] ^= 0x1249U; - state.S[1] ^= 0x36daU; - state.S[2] ^= 0x5b7fU; - state.S[3] ^= 0xec81U; - - /* Roll the tweakey back another "after" rounds */ - for (round = 0; round < FORKSKINNY_64_192_ROUNDS_AFTER; ++round) { - skinny64_inv_LFSR2(state.TK2[0]); - skinny64_inv_LFSR2(state.TK2[1]); - skinny64_inv_LFSR3(state.TK3[0]); - skinny64_inv_LFSR3(state.TK3[1]); - skinny64_inv_permute_tk(state.TK1); - skinny64_inv_permute_tk(state.TK2); - skinny64_inv_permute_tk(state.TK3); +void forkskinny_64_192_reverse_tk + (forkskinny_64_192_state_t *state, unsigned rounds) +{ + unsigned temp; + + /* The tweak permutation repeats every 16 rounds so we can avoid + * some skinny64_inv_permute_tk() calls in the early stages. During + * the 16 rounds, the LFSR will be applied 8 times to every word */ + while (rounds >= 16) { + for (temp = 0; temp < 8; ++temp) { + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR2(state->TK2[2]); + skinny64_inv_LFSR2(state->TK2[3]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_LFSR3(state->TK3[2]); + skinny64_inv_LFSR3(state->TK3[3]); + } + rounds -= 16; } - /* Save the state and the tweakey at the forking point */ - fstate = state; - - /* Generate the left output block after another "before" rounds */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; round > 0; --round) { - forkskinny_64_192_inv_round(&state, round - 1); - } - be_store_word16(output_left, state.S[0]); - be_store_word16(output_left + 2, state.S[1]); - be_store_word16(output_left + 4, state.S[2]); - be_store_word16(output_left + 6, state.S[3]); - - /* Generate the right output block by going forward "after" - * rounds from the forking point */ - for (round = FORKSKINNY_64_192_ROUNDS_BEFORE; - round < (FORKSKINNY_64_192_ROUNDS_BEFORE + - FORKSKINNY_64_192_ROUNDS_AFTER); ++round) { - forkskinny_64_192_round(&fstate, round); + /* Handle the left-over rounds */ + while (rounds > 0) { + skinny64_inv_LFSR2(state->TK2[0]); + skinny64_inv_LFSR2(state->TK2[1]); + skinny64_inv_LFSR3(state->TK3[0]); + skinny64_inv_LFSR3(state->TK3[1]); + skinny64_inv_permute_tk(state->TK1); + skinny64_inv_permute_tk(state->TK2); + skinny64_inv_permute_tk(state->TK3); + --rounds; } - be_store_word16(output_right, fstate.S[0]); - be_store_word16(output_right + 2, fstate.S[1]); - be_store_word16(output_right + 4, fstate.S[2]); - be_store_word16(output_right + 6, fstate.S[3]); } + +#endif /* !__AVR__ */ diff --git a/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/rhys/internal-forkskinny.h b/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/rhys/internal-forkskinny.h index 0c1a707..e3014d3 100644 --- a/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/rhys/internal-forkskinny.h +++ b/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/rhys/internal-forkskinny.h @@ -23,6 +23,8 @@ #ifndef LW_INTERNAL_FORKSKINNY_H #define LW_INTERNAL_FORKSKINNY_H +#include "internal-util.h" + /** * \file internal-forkskinny.h * \brief ForkSkinny block cipher family. @@ -39,6 +41,158 @@ extern "C" { #endif /** + * \brief State information for ForkSkinny-128-256. + */ +typedef struct +{ + uint32_t TK1[4]; /**< First part of the tweakey */ + uint32_t TK2[4]; /**< Second part of the tweakey */ + uint32_t S[4]; /**< Current block state */ + +} forkskinny_128_256_state_t; + +/** + * \brief State information for ForkSkinny-128-384. + */ +typedef struct +{ + uint32_t TK1[4]; /**< First part of the tweakey */ + uint32_t TK2[4]; /**< Second part of the tweakey */ + uint32_t TK3[4]; /**< Third part of the tweakey */ + uint32_t S[4]; /**< Current block state */ + +} forkskinny_128_384_state_t; + +/** + * \brief State information for ForkSkinny-64-192. + */ +typedef struct +{ + uint16_t TK1[4]; /**< First part of the tweakey */ + uint16_t TK2[4]; /**< Second part of the tweakey */ + uint16_t TK3[4]; /**< Third part of the tweakey */ + uint16_t S[4]; /**< Current block state */ + +} forkskinny_64_192_state_t; + +/** + * \brief Applies several rounds of ForkSkinny-128-256. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + */ +void forkskinny_128_256_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-128-256 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_128_256_inv_rounds + (forkskinny_128_256_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-128-256. + * + * \param state Points to the ForkSkinny-128-256 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_128_256_forward_tk + (forkskinny_128_256_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-128-256. + * + * \param state Points to the ForkSkinny-128-256 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_128_256_reverse_tk + (forkskinny_128_256_state_t *state, unsigned rounds); + +/** + * \brief Applies several rounds of ForkSkinny-128-384. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + */ +void forkskinny_128_384_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-128-384 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_128_384_inv_rounds + (forkskinny_128_384_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-128-384. + * + * \param state Points to the ForkSkinny-128-384 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_128_384_forward_tk + (forkskinny_128_384_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-128-384. + * + * \param state Points to the ForkSkinny-128-384 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_128_384_reverse_tk + (forkskinny_128_384_state_t *state, unsigned rounds); + +/** + * \brief Applies several rounds of ForkSkinny-64-192. + * + * \param state State to apply the rounds to. + * \param first First round to apply. + * \param last Last round to apply plus 1. + * + * Note: The cells of each row are ordered in big-endian nibble order + * so it is simplest to manage the rows in big-endian byte order. + */ +void forkskinny_64_192_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last); + +/** + * \brief Applies several rounds of ForkSkinny-64-192 in reverse. + * + * \param state State to apply the rounds to. + * \param first First round to apply plus 1. + * \param last Last round to apply. + */ +void forkskinny_64_192_inv_rounds + (forkskinny_64_192_state_t *state, unsigned first, unsigned last); + +/** + * \brief Forwards the tweakey for ForkSkinny-64-192. + * + * \param state Points to the ForkSkinny-64-192 state. + * \param rounds Number of rounds to forward by. + */ +void forkskinny_64_192_forward_tk + (forkskinny_64_192_state_t *state, unsigned rounds); + +/** + * \brief Reverses the tweakey for ForkSkinny-64-192. + * + * \param state Points to the ForkSkinny-64-192 state. + * \param rounds Number of rounds to reverse by. + */ +void forkskinny_64_192_reverse_tk + (forkskinny_64_192_state_t *state, unsigned rounds); + +/** * \brief Encrypts a block of plaintext with ForkSkinny-128-256. * * \param key 256-bit tweakey for ForkSkinny-128-256. diff --git a/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/rhys/internal-skinnyutil.h b/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/rhys/internal-skinnyutil.h index 83136cb..8a5296d 100644 --- a/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/rhys/internal-skinnyutil.h +++ b/forkae/Implementations/crypto_aead/saefforkskinnyb128t256n120v1/rhys/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: diff --git a/grain-128aead/Implementations/crypto_aead/grain128aead/rhys/grain128.h b/grain-128aead/Implementations/crypto_aead/grain128aead/rhys/grain128.h index c8d6de9..ef16440 100644 --- a/grain-128aead/Implementations/crypto_aead/grain128aead/rhys/grain128.h +++ b/grain-128aead/Implementations/crypto_aead/grain128aead/rhys/grain128.h @@ -33,6 +33,8 @@ * combination of a 128-bit linear feedback shift register (LFSR) and a * 128-bit non-linear feedback shift register (NFSR). It is a member of * the Grain family of stream ciphers. + * + * References: https://grain-128aead.github.io/ */ #ifdef __cplusplus diff --git a/grain-128aead/Implementations/crypto_aead/grain128aead/rhys/internal-grain128-avr.S b/grain-128aead/Implementations/crypto_aead/grain128aead/rhys/internal-grain128-avr.S new file mode 100644 index 0000000..007e486 --- /dev/null +++ b/grain-128aead/Implementations/crypto_aead/grain128aead/rhys/internal-grain128-avr.S @@ -0,0 +1,1947 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .text +.global grain128_core + .type grain128_core, @function +grain128_core: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r26,Z + ldd r27,Z+1 + ldd r28,Z+2 + ldd r29,Z+3 + ldd r2,Z+4 + ldd r3,Z+5 + ldd r4,Z+6 + ldd r5,Z+7 + ldd r6,Z+8 + ldd r7,Z+9 + ldd r8,Z+10 + ldd r9,Z+11 + ldd r10,Z+12 + ldd r11,Z+13 + ldd r12,Z+14 + ldd r13,Z+15 + eor r20,r26 + eor r21,r27 + eor r22,r28 + eor r23,r29 + mov r15,r26 + mov r24,r27 + mov r25,r28 + mov r1,r29 + mov r14,r5 + lsr r1 + ror r25 + ror r24 + ror r15 + ror r14 + eor r20,r14 + eor r21,r15 + eor r22,r24 + eor r23,r25 + mov r15,r2 + mov r24,r3 + mov r25,r4 + mov r1,r5 + mov r14,r9 + lsr r1 + ror r25 + ror r24 + ror r15 + ror r14 + lsr r1 + ror r25 + ror r24 + ror r15 + ror r14 + eor r20,r14 + eor r21,r15 + eor r22,r24 + eor r23,r25 + mov r15,r6 + mov r24,r7 + mov r25,r8 + mov r1,r9 + mov r14,r13 + lsr r1 + ror r25 + ror r24 + ror r15 + ror r14 + lsr r1 + ror r25 + ror r24 + ror r15 + ror r14 + eor r20,r14 + eor r21,r15 + eor r22,r24 + eor r23,r25 + mov r25,r6 + mov r1,r7 + mov r14,r11 + mov r15,r12 + mov r24,r13 + lsl r14 + rol r15 + rol r24 + rol r25 + rol r1 + eor r20,r15 + eor r21,r24 + eor r22,r25 + eor r23,r1 + eor r20,r10 + eor r21,r11 + eor r22,r12 + eor r23,r13 + st Z,r2 + std Z+1,r3 + std Z+2,r4 + std Z+3,r5 + std Z+4,r6 + std Z+5,r7 + std Z+6,r8 + std Z+7,r9 + std Z+8,r10 + std Z+9,r11 + std Z+10,r12 + std Z+11,r13 + std Z+12,r20 + std Z+13,r21 + std Z+14,r22 + std Z+15,r23 + eor r16,r26 + eor r17,r27 + eor r18,r28 + eor r19,r29 + ldd r26,Z+16 + ldd r27,Z+17 + ldd r28,Z+18 + ldd r29,Z+19 + ldd r2,Z+20 + ldd r3,Z+21 + ldd r4,Z+22 + ldd r5,Z+23 + ldd r6,Z+24 + ldd r7,Z+25 + ldd r8,Z+26 + ldd r9,Z+27 + ldd r10,Z+28 + ldd r11,Z+29 + ldd r12,Z+30 + ldd r13,Z+31 + eor r16,r26 + eor r17,r27 + eor r18,r28 + eor r19,r29 + mov r14,r26 + movw r20,r2 + movw r22,r4 + lsl r20 + rol r21 + rol r22 + rol r23 + rol r14 + lsl r20 + rol r21 + rol r22 + rol r23 + rol r14 + eor r16,r21 + eor r17,r22 + eor r18,r23 + eor r19,r14 + eor r16,r7 + eor r17,r8 + eor r18,r9 + eor r19,r2 + mov r14,r6 + movw r20,r10 + movw r22,r12 + lsl r20 + rol r21 + rol r22 + rol r23 + rol r14 + lsl r20 + rol r21 + rol r22 + rol r23 + rol r14 + lsl r20 + rol r21 + rol r22 + rol r23 + rol r14 + eor r16,r21 + eor r17,r22 + eor r18,r23 + eor r19,r14 + eor r16,r10 + eor r17,r11 + eor r18,r12 + eor r19,r13 + mov r21,r26 + mov r22,r27 + mov r23,r28 + mov r14,r29 + mov r20,r5 + lsl r20 + rol r21 + rol r22 + rol r23 + rol r14 + lsl r20 + rol r21 + rol r22 + rol r23 + rol r14 + lsl r20 + rol r21 + rol r22 + rol r23 + rol r14 + mov r25,r6 + mov r15,r7 + mov r1,r8 + mov r0,r9 + mov r24,r13 + lsl r24 + rol r25 + rol r15 + rol r1 + rol r0 + lsl r24 + rol r25 + rol r15 + rol r1 + rol r0 + lsl r24 + rol r25 + rol r15 + rol r1 + rol r0 + and r21,r25 + and r22,r15 + and r23,r1 + and r14,r0 + eor r16,r21 + eor r17,r22 + eor r18,r23 + eor r19,r14 + movw r22,r26 + mov r14,r28 + movw r20,r4 + lsl r20 + rol r21 + rol r22 + rol r23 + rol r14 + lsl r20 + rol r21 + rol r22 + rol r23 + rol r14 + lsl r20 + rol r21 + rol r22 + rol r23 + rol r14 + mov r15,r26 + mov r1,r27 + mov r0,r28 + movw r24,r4 + lsr r0 + ror r1 + ror r15 + ror r25 + ror r24 + lsr r0 + ror r1 + ror r15 + ror r25 + ror r24 + lsr r0 + ror r1 + ror r15 + ror r25 + ror r24 + and r21,r24 + and r22,r25 + and r23,r15 + and r14,r1 + eor r16,r21 + eor r17,r22 + eor r18,r23 + eor r19,r14 + mov r23,r26 + mov r14,r27 + mov r20,r3 + mov r21,r4 + mov r22,r5 + lsl r20 + rol r21 + rol r22 + rol r23 + rol r14 + mov r1,r26 + mov r0,r27 + mov r24,r3 + mov r25,r4 + mov r15,r5 + lsl r24 + rol r25 + rol r15 + rol r1 + rol r0 + lsl r24 + rol r25 + rol r15 + rol r1 + rol r0 + and r21,r25 + and r22,r15 + and r23,r1 + and r14,r0 + eor r16,r21 + eor r17,r22 + eor r18,r23 + eor r19,r14 + mov r14,r26 + movw r20,r2 + movw r22,r4 + lsl r20 + rol r21 + rol r22 + rol r23 + rol r14 + lsl r20 + rol r21 + rol r22 + rol r23 + rol r14 + lsl r20 + rol r21 + rol r22 + rol r23 + rol r14 + mov r0,r2 + movw r24,r6 + mov r15,r8 + mov r1,r9 + lsl r24 + rol r25 + rol r15 + rol r1 + rol r0 + lsl r24 + rol r25 + rol r15 + rol r1 + rol r0 + lsl r24 + rol r25 + rol r15 + rol r1 + rol r0 + and r21,r25 + and r22,r15 + and r23,r1 + and r14,r0 + eor r16,r21 + eor r17,r22 + eor r18,r23 + eor r19,r14 + mov r20,r9 + mov r21,r2 + mov r22,r3 + mov r23,r4 + and r20,r8 + and r21,r9 + and r22,r2 + and r23,r3 + eor r16,r20 + eor r17,r21 + eor r18,r22 + eor r19,r23 + mov r14,r2 + movw r20,r6 + movw r22,r8 + lsr r14 + ror r23 + ror r22 + ror r21 + ror r20 + lsr r14 + ror r23 + ror r22 + ror r21 + ror r20 + lsr r14 + ror r23 + ror r22 + ror r21 + ror r20 + mov r25,r6 + mov r15,r7 + mov r1,r8 + mov r0,r9 + mov r24,r13 + lsl r24 + rol r25 + rol r15 + rol r1 + rol r0 + and r20,r25 + and r21,r15 + and r22,r1 + and r23,r0 + eor r16,r20 + eor r17,r21 + eor r18,r22 + eor r19,r23 + mov r21,r6 + mov r22,r7 + mov r23,r8 + mov r14,r9 + mov r20,r13 + lsl r20 + rol r21 + rol r22 + rol r23 + rol r14 + lsl r20 + rol r21 + rol r22 + rol r23 + rol r14 + lsl r20 + rol r21 + rol r22 + rol r23 + rol r14 + lsl r20 + rol r21 + rol r22 + rol r23 + rol r14 + mov r1,r6 + mov r0,r7 + mov r24,r11 + mov r25,r12 + mov r15,r13 + lsl r24 + rol r25 + rol r15 + rol r1 + rol r0 + lsl r24 + rol r25 + rol r15 + rol r1 + rol r0 + lsl r24 + rol r25 + rol r15 + rol r1 + rol r0 + lsl r24 + rol r25 + rol r15 + rol r1 + rol r0 + and r21,r25 + and r22,r15 + and r23,r1 + and r14,r0 + eor r16,r21 + eor r17,r22 + eor r18,r23 + eor r19,r14 + mov r23,r26 + mov r14,r27 + mov r20,r3 + mov r21,r4 + mov r22,r5 + lsr r14 + ror r23 + ror r22 + ror r21 + ror r20 + lsr r14 + ror r23 + ror r22 + ror r21 + ror r20 + and r20,r3 + and r21,r4 + and r22,r5 + and r23,r26 + mov r0,r26 + movw r24,r2 + mov r15,r4 + mov r1,r5 + lsl r24 + rol r25 + rol r15 + rol r1 + rol r0 + and r20,r25 + and r21,r15 + and r22,r1 + and r23,r0 + eor r16,r20 + eor r17,r21 + eor r18,r22 + eor r19,r23 + mov r21,r6 + mov r22,r7 + mov r23,r8 + mov r14,r9 + mov r20,r13 + lsr r14 + ror r23 + ror r22 + ror r21 + ror r20 + lsr r14 + ror r23 + ror r22 + ror r21 + ror r20 + mov r15,r6 + mov r1,r7 + mov r0,r8 + movw r24,r12 + lsr r0 + ror r1 + ror r15 + ror r25 + ror r24 + lsr r0 + ror r1 + ror r15 + ror r25 + ror r24 + and r20,r24 + and r21,r25 + and r22,r15 + and r23,r1 + mov r1,r6 + mov r0,r7 + mov r24,r11 + mov r25,r12 + mov r15,r13 + lsl r24 + rol r25 + rol r15 + rol r1 + rol r0 + lsl r24 + rol r25 + rol r15 + rol r1 + rol r0 + and r20,r25 + and r21,r15 + and r22,r1 + and r23,r0 + eor r16,r20 + eor r17,r21 + eor r18,r22 + eor r19,r23 + mov r20,r11 + mov r21,r12 + mov r22,r13 + mov r23,r6 + mov r1,r6 + movw r14,r10 + movw r24,r12 + lsl r14 + rol r15 + rol r24 + rol r25 + rol r1 + lsl r14 + rol r15 + rol r24 + rol r25 + rol r1 + lsl r14 + rol r15 + rol r24 + rol r25 + rol r1 + lsl r14 + rol r15 + rol r24 + rol r25 + rol r1 + and r20,r15 + and r21,r24 + and r22,r25 + and r23,r1 + mov r1,r6 + movw r14,r10 + movw r24,r12 + lsr r1 + ror r25 + ror r24 + ror r15 + ror r14 + lsr r1 + ror r25 + ror r24 + ror r15 + ror r14 + lsr r1 + ror r25 + ror r24 + ror r15 + ror r14 + and r20,r14 + and r21,r15 + and r22,r24 + and r23,r25 + mov r1,r6 + movw r14,r10 + movw r24,r12 + lsr r1 + ror r25 + ror r24 + ror r15 + ror r14 + and r20,r14 + and r21,r15 + and r22,r24 + and r23,r25 + eor r16,r20 + eor r17,r21 + eor r18,r22 + eor r19,r23 + std Z+16,r2 + std Z+17,r3 + std Z+18,r4 + std Z+19,r5 + std Z+20,r6 + std Z+21,r7 + std Z+22,r8 + std Z+23,r9 + std Z+24,r10 + std Z+25,r11 + std Z+26,r12 + std Z+27,r13 + std Z+28,r16 + std Z+29,r17 + std Z+30,r18 + std Z+31,r19 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size grain128_core, .-grain128_core + + .text +.global grain128_preoutput + .type grain128_preoutput, @function +grain128_preoutput: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ldd r20,Z+16 + ldd r21,Z+17 + ldd r26,Z+18 + ldd r18,Z+22 + ldd r19,Z+23 + lsl r18 + rol r19 + rol r20 + rol r21 + rol r26 + lsl r18 + rol r19 + rol r20 + rol r21 + rol r26 + lsl r18 + rol r19 + rol r20 + rol r21 + rol r26 + lsl r18 + rol r19 + rol r20 + rol r21 + rol r26 + ldd r18,Z+27 + ldd r28,Z+28 + ldd r29,Z+29 + ldd r2,Z+30 + ldd r3,Z+31 + lsr r18 + ror r3 + ror r2 + ror r29 + ror r28 + ld r4,Z + ldd r5,Z+1 + ldd r6,Z+2 + ldd r7,Z+3 + ldd r8,Z+4 + ldd r9,Z+5 + ldd r10,Z+6 + ldd r11,Z+7 + mov r23,r4 + mov r24,r5 + mov r25,r6 + mov r22,r11 + and r22,r19 + and r23,r20 + and r24,r21 + and r25,r26 + movw r14,r4 + mov r18,r6 + movw r12,r10 + lsr r18 + ror r15 + ror r14 + ror r13 + ror r12 + lsr r18 + ror r15 + ror r14 + ror r13 + ror r12 + lsr r18 + ror r15 + ror r14 + ror r13 + ror r12 + mov r1,r4 + mov r0,r5 + mov r16,r9 + mov r17,r10 + mov r27,r11 + lsl r16 + rol r17 + rol r27 + rol r1 + rol r0 + lsl r16 + rol r17 + rol r27 + rol r1 + rol r0 + lsl r16 + rol r17 + rol r27 + rol r1 + rol r0 + lsl r16 + rol r17 + rol r27 + rol r1 + rol r0 + and r12,r17 + and r13,r27 + and r14,r1 + and r15,r0 + eor r22,r12 + eor r23,r13 + eor r24,r14 + eor r25,r15 + ldd r4,Z+8 + ldd r5,Z+9 + ldd r6,Z+10 + ldd r7,Z+11 + movw r14,r8 + mov r18,r10 + movw r12,r6 + lsl r12 + rol r13 + rol r14 + rol r15 + rol r18 + lsl r12 + rol r13 + rol r14 + rol r15 + rol r18 + and r13,r28 + and r14,r29 + and r15,r2 + and r18,r3 + eor r22,r13 + eor r23,r14 + eor r24,r15 + eor r25,r18 + mov r18,r8 + movw r12,r4 + movw r14,r6 + lsl r12 + rol r13 + rol r14 + rol r15 + rol r18 + lsl r12 + rol r13 + rol r14 + rol r15 + rol r18 + lsl r12 + rol r13 + rol r14 + rol r15 + rol r18 + lsl r12 + rol r13 + rol r14 + rol r15 + rol r18 + ldd r8,Z+12 + ldd r9,Z+13 + ldd r10,Z+14 + ldd r11,Z+15 + mov r27,r4 + mov r1,r5 + mov r0,r6 + movw r16,r10 + lsr r0 + ror r1 + ror r27 + ror r17 + ror r16 + and r13,r16 + and r14,r17 + and r15,r27 + and r18,r1 + eor r22,r13 + eor r23,r14 + eor r24,r15 + eor r25,r18 + and r19,r28 + and r20,r29 + and r21,r2 + and r26,r3 + mov r18,r4 + movw r12,r8 + movw r14,r10 + lsr r18 + ror r15 + ror r14 + ror r13 + ror r12 + lsr r18 + ror r15 + ror r14 + ror r13 + ror r12 + and r19,r12 + and r20,r13 + and r21,r14 + and r26,r15 + eor r22,r19 + eor r23,r20 + eor r24,r21 + eor r25,r26 + mov r18,r4 + movw r12,r8 + movw r14,r10 + lsr r18 + ror r15 + ror r14 + ror r13 + ror r12 + lsr r18 + ror r15 + ror r14 + ror r13 + ror r12 + lsr r18 + ror r15 + ror r14 + ror r13 + ror r12 + eor r22,r12 + eor r23,r13 + eor r24,r14 + eor r25,r15 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r26,Z+20 + ldd r27,Z+21 + ldd r28,Z+22 + ldd r29,Z+23 + ldd r4,Z+24 + ldd r5,Z+25 + ldd r6,Z+26 + ldd r7,Z+27 + ldd r8,Z+28 + ldd r9,Z+29 + ldd r10,Z+30 + ldd r11,Z+31 + mov r3,r18 + mov r12,r19 + mov r13,r20 + mov r14,r21 + mov r2,r29 + lsl r2 + rol r3 + rol r12 + rol r13 + rol r14 + lsl r2 + rol r3 + rol r12 + rol r13 + rol r14 + eor r22,r3 + eor r23,r12 + eor r24,r13 + eor r25,r14 + movw r12,r18 + mov r14,r20 + movw r2,r28 + lsr r14 + ror r13 + ror r12 + ror r3 + ror r2 + eor r22,r2 + eor r23,r3 + eor r24,r12 + eor r25,r13 + mov r3,r26 + mov r12,r27 + mov r13,r28 + mov r14,r29 + mov r2,r7 + lsl r2 + rol r3 + rol r12 + rol r13 + rol r14 + lsl r2 + rol r3 + rol r12 + rol r13 + rol r14 + lsl r2 + rol r3 + rol r12 + rol r13 + rol r14 + lsl r2 + rol r3 + rol r12 + rol r13 + rol r14 + eor r22,r3 + eor r23,r12 + eor r24,r13 + eor r25,r14 + movw r12,r26 + mov r14,r28 + movw r2,r6 + lsr r14 + ror r13 + ror r12 + ror r3 + ror r2 + lsr r14 + ror r13 + ror r12 + ror r3 + ror r2 + lsr r14 + ror r13 + ror r12 + ror r3 + ror r2 + eor r22,r2 + eor r23,r3 + eor r24,r12 + eor r25,r13 + eor r22,r4 + eor r23,r5 + eor r24,r6 + eor r25,r7 + movw r12,r4 + mov r14,r6 + movw r2,r10 + lsl r2 + rol r3 + rol r12 + rol r13 + rol r14 + eor r22,r3 + eor r23,r12 + eor r24,r13 + eor r25,r14 + mov r14,r4 + movw r2,r8 + movw r12,r10 + lsl r2 + rol r3 + rol r12 + rol r13 + rol r14 + eor r22,r3 + eor r23,r12 + eor r24,r13 + eor r25,r14 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size grain128_preoutput, .-grain128_preoutput + + .text +.global grain128_swap_word32 + .type grain128_swap_word32, @function +grain128_swap_word32: + movw r30,r24 +.L__stack_usage = 2 + ld r25,Z + ldd r24,Z+1 + ldd r23,Z+2 + ldd r22,Z+3 + mov r18,r22 + andi r18,85 + lsl r18 + lsr r22 + andi r22,85 + or r22,r18 + mov r18,r22 + andi r18,51 + lsl r18 + lsl r18 + lsr r22 + lsr r22 + andi r22,51 + or r22,r18 + swap r22 + mov r18,r23 + andi r18,85 + lsl r18 + lsr r23 + andi r23,85 + or r23,r18 + mov r18,r23 + andi r18,51 + lsl r18 + lsl r18 + lsr r23 + lsr r23 + andi r23,51 + or r23,r18 + swap r23 + mov r18,r24 + andi r18,85 + lsl r18 + lsr r24 + andi r24,85 + or r24,r18 + mov r18,r24 + andi r18,51 + lsl r18 + lsl r18 + lsr r24 + lsr r24 + andi r24,51 + or r24,r18 + swap r24 + mov r18,r25 + andi r18,85 + lsl r18 + lsr r25 + andi r25,85 + or r25,r18 + mov r18,r25 + andi r18,51 + lsl r18 + lsl r18 + lsr r25 + lsr r25 + andi r25,51 + or r25,r18 + swap r25 + ret + .size grain128_swap_word32, .-grain128_swap_word32 + + .text +.global grain128_compute_tag + .type grain128_compute_tag, @function +grain128_compute_tag: + movw r30,r24 +.L__stack_usage = 2 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r0,Z+40 + eor r18,r0 + ldd r0,Z+41 + eor r19,r0 + ldd r0,Z+42 + eor r20,r0 + ldd r0,Z+43 + eor r21,r0 + ldd r0,Z+44 + eor r22,r0 + ldd r0,Z+45 + eor r23,r0 + ldd r0,Z+46 + eor r26,r0 + ldd r0,Z+47 + eor r27,r0 + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + mov r24,r18 + andi r24,85 + lsl r24 + lsr r18 + andi r18,85 + or r18,r24 + mov r24,r18 + andi r24,51 + lsl r24 + lsl r24 + lsr r18 + lsr r18 + andi r18,51 + or r18,r24 + swap r18 + mov r24,r19 + andi r24,85 + lsl r24 + lsr r19 + andi r19,85 + or r19,r24 + mov r24,r19 + andi r24,51 + lsl r24 + lsl r24 + lsr r19 + lsr r19 + andi r19,51 + or r19,r24 + swap r19 + mov r24,r20 + andi r24,85 + lsl r24 + lsr r20 + andi r20,85 + or r20,r24 + mov r24,r20 + andi r24,51 + lsl r24 + lsl r24 + lsr r20 + lsr r20 + andi r20,51 + or r20,r24 + swap r20 + mov r24,r21 + andi r24,85 + lsl r24 + lsr r21 + andi r21,85 + or r21,r24 + mov r24,r21 + andi r24,51 + lsl r24 + lsl r24 + lsr r21 + lsr r21 + andi r21,51 + or r21,r24 + swap r21 + mov r24,r22 + andi r24,85 + lsl r24 + lsr r22 + andi r22,85 + or r22,r24 + mov r24,r22 + andi r24,51 + lsl r24 + lsl r24 + lsr r22 + lsr r22 + andi r22,51 + or r22,r24 + swap r22 + mov r24,r23 + andi r24,85 + lsl r24 + lsr r23 + andi r23,85 + or r23,r24 + mov r24,r23 + andi r24,51 + lsl r24 + lsl r24 + lsr r23 + lsr r23 + andi r23,51 + or r23,r24 + swap r23 + mov r24,r26 + andi r24,85 + lsl r24 + lsr r26 + andi r26,85 + or r26,r24 + mov r24,r26 + andi r24,51 + lsl r24 + lsl r24 + lsr r26 + lsr r26 + andi r26,51 + or r26,r24 + swap r26 + mov r24,r27 + andi r24,85 + lsl r24 + lsr r27 + andi r27,85 + or r27,r24 + mov r24,r27 + andi r24,51 + lsl r24 + lsl r24 + lsr r27 + lsr r27 + andi r27,51 + or r27,r24 + swap r27 + std Z+48,r27 + std Z+49,r26 + std Z+50,r23 + std Z+51,r22 + std Z+52,r21 + std Z+53,r20 + std Z+54,r19 + std Z+55,r18 + ret + .size grain128_compute_tag, .-grain128_compute_tag + + .text +.global grain128_interleave + .type grain128_interleave, @function +grain128_interleave: + movw r30,r24 +.L__stack_usage = 2 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + movw r22,r18 + movw r26,r20 + lsr r27 + ror r26 + ror r23 + ror r22 + lsr r27 + ror r26 + ror r23 + ror r22 + lsr r27 + ror r26 + ror r23 + ror r22 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + andi r22,17 + andi r23,17 + andi r26,17 + andi r27,17 + eor r18,r22 + eor r19,r23 + eor r20,r26 + eor r21,r27 + lsl r22 + rol r23 + rol r26 + rol r27 + lsl r22 + rol r23 + rol r26 + rol r27 + lsl r22 + rol r23 + rol r26 + rol r27 + eor r18,r22 + eor r19,r23 + eor r20,r26 + eor r21,r27 + movw r22,r18 + movw r26,r20 + mov r0,r1 + lsl r22 + rol r23 + rol r26 + rol r27 + rol r0 + lsl r22 + rol r23 + rol r26 + rol r27 + rol r0 + mov r22,r23 + mov r23,r26 + mov r26,r27 + mov r27,r0 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + andi r22,3 + andi r23,3 + andi r26,3 + andi r27,3 + eor r18,r22 + eor r19,r23 + eor r20,r26 + eor r21,r27 + mov r0,r1 + lsr r27 + ror r26 + ror r23 + ror r22 + ror r0 + lsr r27 + ror r26 + ror r23 + ror r22 + ror r0 + mov r27,r26 + mov r26,r23 + mov r23,r22 + mov r22,r0 + eor r18,r22 + eor r19,r23 + eor r20,r26 + eor r21,r27 + movw r22,r18 + movw r26,r20 + mov r22,r23 + mov r23,r26 + mov r26,r27 + mov r27,r1 + lsr r27 + ror r26 + ror r23 + ror r22 + lsr r27 + ror r26 + ror r23 + ror r22 + lsr r27 + ror r26 + ror r23 + ror r22 + lsr r27 + ror r26 + ror r23 + ror r22 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + andi r22,15 + mov r23,r1 + andi r26,15 + mov r27,r1 + eor r18,r22 + eor r19,r23 + eor r20,r26 + eor r21,r27 + mov r27,r26 + mov r26,r23 + mov r23,r22 + mov r22,r1 + lsl r22 + rol r23 + rol r26 + rol r27 + lsl r22 + rol r23 + rol r26 + rol r27 + lsl r22 + rol r23 + rol r26 + rol r27 + lsl r22 + rol r23 + rol r26 + rol r27 + eor r18,r22 + eor r19,r23 + eor r20,r26 + eor r21,r27 + st Z,r20 + std Z+1,r21 + std Z+2,r18 + std Z+3,r19 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r20,Z+6 + ldd r21,Z+7 + movw r22,r18 + movw r26,r20 + lsr r27 + ror r26 + ror r23 + ror r22 + lsr r27 + ror r26 + ror r23 + ror r22 + lsr r27 + ror r26 + ror r23 + ror r22 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + andi r22,17 + andi r23,17 + andi r26,17 + andi r27,17 + eor r18,r22 + eor r19,r23 + eor r20,r26 + eor r21,r27 + lsl r22 + rol r23 + rol r26 + rol r27 + lsl r22 + rol r23 + rol r26 + rol r27 + lsl r22 + rol r23 + rol r26 + rol r27 + eor r18,r22 + eor r19,r23 + eor r20,r26 + eor r21,r27 + movw r22,r18 + movw r26,r20 + mov r0,r1 + lsl r22 + rol r23 + rol r26 + rol r27 + rol r0 + lsl r22 + rol r23 + rol r26 + rol r27 + rol r0 + mov r22,r23 + mov r23,r26 + mov r26,r27 + mov r27,r0 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + andi r22,3 + andi r23,3 + andi r26,3 + andi r27,3 + eor r18,r22 + eor r19,r23 + eor r20,r26 + eor r21,r27 + mov r0,r1 + lsr r27 + ror r26 + ror r23 + ror r22 + ror r0 + lsr r27 + ror r26 + ror r23 + ror r22 + ror r0 + mov r27,r26 + mov r26,r23 + mov r23,r22 + mov r22,r0 + eor r18,r22 + eor r19,r23 + eor r20,r26 + eor r21,r27 + movw r22,r18 + movw r26,r20 + mov r22,r23 + mov r23,r26 + mov r26,r27 + mov r27,r1 + lsr r27 + ror r26 + ror r23 + ror r22 + lsr r27 + ror r26 + ror r23 + ror r22 + lsr r27 + ror r26 + ror r23 + ror r22 + lsr r27 + ror r26 + ror r23 + ror r22 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + andi r22,15 + mov r23,r1 + andi r26,15 + mov r27,r1 + eor r18,r22 + eor r19,r23 + eor r20,r26 + eor r21,r27 + mov r27,r26 + mov r26,r23 + mov r23,r22 + mov r22,r1 + lsl r22 + rol r23 + rol r26 + rol r27 + lsl r22 + rol r23 + rol r26 + rol r27 + lsl r22 + rol r23 + rol r26 + rol r27 + lsl r22 + rol r23 + rol r26 + rol r27 + eor r18,r22 + eor r19,r23 + eor r20,r26 + eor r21,r27 + std Z+4,r20 + std Z+5,r21 + std Z+6,r18 + std Z+7,r19 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r20,Z+10 + ldd r21,Z+11 + movw r22,r18 + movw r26,r20 + lsr r27 + ror r26 + ror r23 + ror r22 + lsr r27 + ror r26 + ror r23 + ror r22 + lsr r27 + ror r26 + ror r23 + ror r22 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + andi r22,17 + andi r23,17 + andi r26,17 + andi r27,17 + eor r18,r22 + eor r19,r23 + eor r20,r26 + eor r21,r27 + lsl r22 + rol r23 + rol r26 + rol r27 + lsl r22 + rol r23 + rol r26 + rol r27 + lsl r22 + rol r23 + rol r26 + rol r27 + eor r18,r22 + eor r19,r23 + eor r20,r26 + eor r21,r27 + movw r22,r18 + movw r26,r20 + mov r0,r1 + lsl r22 + rol r23 + rol r26 + rol r27 + rol r0 + lsl r22 + rol r23 + rol r26 + rol r27 + rol r0 + mov r22,r23 + mov r23,r26 + mov r26,r27 + mov r27,r0 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + andi r22,3 + andi r23,3 + andi r26,3 + andi r27,3 + eor r18,r22 + eor r19,r23 + eor r20,r26 + eor r21,r27 + mov r0,r1 + lsr r27 + ror r26 + ror r23 + ror r22 + ror r0 + lsr r27 + ror r26 + ror r23 + ror r22 + ror r0 + mov r27,r26 + mov r26,r23 + mov r23,r22 + mov r22,r0 + eor r18,r22 + eor r19,r23 + eor r20,r26 + eor r21,r27 + movw r22,r18 + movw r26,r20 + mov r22,r23 + mov r23,r26 + mov r26,r27 + mov r27,r1 + lsr r27 + ror r26 + ror r23 + ror r22 + lsr r27 + ror r26 + ror r23 + ror r22 + lsr r27 + ror r26 + ror r23 + ror r22 + lsr r27 + ror r26 + ror r23 + ror r22 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + andi r22,15 + mov r23,r1 + andi r26,15 + mov r27,r1 + eor r18,r22 + eor r19,r23 + eor r20,r26 + eor r21,r27 + mov r27,r26 + mov r26,r23 + mov r23,r22 + mov r22,r1 + lsl r22 + rol r23 + rol r26 + rol r27 + lsl r22 + rol r23 + rol r26 + rol r27 + lsl r22 + rol r23 + rol r26 + rol r27 + lsl r22 + rol r23 + rol r26 + rol r27 + eor r18,r22 + eor r19,r23 + eor r20,r26 + eor r21,r27 + std Z+8,r20 + std Z+9,r21 + std Z+10,r18 + std Z+11,r19 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r20,Z+14 + ldd r21,Z+15 + movw r22,r18 + movw r26,r20 + lsr r27 + ror r26 + ror r23 + ror r22 + lsr r27 + ror r26 + ror r23 + ror r22 + lsr r27 + ror r26 + ror r23 + ror r22 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + andi r22,17 + andi r23,17 + andi r26,17 + andi r27,17 + eor r18,r22 + eor r19,r23 + eor r20,r26 + eor r21,r27 + lsl r22 + rol r23 + rol r26 + rol r27 + lsl r22 + rol r23 + rol r26 + rol r27 + lsl r22 + rol r23 + rol r26 + rol r27 + eor r18,r22 + eor r19,r23 + eor r20,r26 + eor r21,r27 + movw r22,r18 + movw r26,r20 + mov r0,r1 + lsl r22 + rol r23 + rol r26 + rol r27 + rol r0 + lsl r22 + rol r23 + rol r26 + rol r27 + rol r0 + mov r22,r23 + mov r23,r26 + mov r26,r27 + mov r27,r0 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + andi r22,3 + andi r23,3 + andi r26,3 + andi r27,3 + eor r18,r22 + eor r19,r23 + eor r20,r26 + eor r21,r27 + mov r0,r1 + lsr r27 + ror r26 + ror r23 + ror r22 + ror r0 + lsr r27 + ror r26 + ror r23 + ror r22 + ror r0 + mov r27,r26 + mov r26,r23 + mov r23,r22 + mov r22,r0 + eor r18,r22 + eor r19,r23 + eor r20,r26 + eor r21,r27 + movw r22,r18 + movw r26,r20 + mov r22,r23 + mov r23,r26 + mov r26,r27 + mov r27,r1 + lsr r27 + ror r26 + ror r23 + ror r22 + lsr r27 + ror r26 + ror r23 + ror r22 + lsr r27 + ror r26 + ror r23 + ror r22 + lsr r27 + ror r26 + ror r23 + ror r22 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + andi r22,15 + mov r23,r1 + andi r26,15 + mov r27,r1 + eor r18,r22 + eor r19,r23 + eor r20,r26 + eor r21,r27 + mov r27,r26 + mov r26,r23 + mov r23,r22 + mov r22,r1 + lsl r22 + rol r23 + rol r26 + rol r27 + lsl r22 + rol r23 + rol r26 + rol r27 + lsl r22 + rol r23 + rol r26 + rol r27 + lsl r22 + rol r23 + rol r26 + rol r27 + eor r18,r22 + eor r19,r23 + eor r20,r26 + eor r21,r27 + std Z+12,r20 + std Z+13,r21 + std Z+14,r18 + std Z+15,r19 + ret + .size grain128_interleave, .-grain128_interleave + +#endif diff --git a/grain-128aead/Implementations/crypto_aead/grain128aead/rhys/internal-grain128.c b/grain-128aead/Implementations/crypto_aead/grain128aead/rhys/internal-grain128.c index d0d71ea..c98376d 100644 --- a/grain-128aead/Implementations/crypto_aead/grain128aead/rhys/internal-grain128.c +++ b/grain-128aead/Implementations/crypto_aead/grain128aead/rhys/internal-grain128.c @@ -26,14 +26,9 @@ #define GWORD(a, b, start_bit) \ (((a) << ((start_bit) % 32)) ^ ((b) >> (32 - ((start_bit) % 32)))) -/** - * \brief Performs 32 rounds of Grain-128 in parallel. - * - * \param state Grain-128 state. - * \param x 32 bits of input to be incorporated into the LFSR state, or zero. - * \param x2 Another 32 bits to be incorporated into the NFSR state, or zero. - */ -static void grain128_core +#if !defined(__AVR__) + +void grain128_core (grain128_state_t *state, uint32_t x, uint32_t x2) { uint32_t s0, s1, s2, s3; @@ -67,7 +62,7 @@ static void grain128_core /* Perform the NFSR feedback algorithm from the specification: * * b'[i] = b[i + 1] - * b'[127] = s'[127] ^ b[0] ^ b[26] ^ b[56] ^ b[91] ^ b[96] + * b'[127] = s[0] ^ b[0] ^ b[26] ^ b[56] ^ b[91] ^ b[96] * ^ (b[3] & b[67]) ^ (b[11] & b[13]) ^ (b[17] & b[18]) * ^ (b[27] & b[59]) ^ (b[40] & b[48]) ^ (b[61] & b[65]) * ^ (b[68] & b[84]) ^ (b[22] & b[24] & b[25]) @@ -106,14 +101,19 @@ static void grain128_core state->nfsr[3] = x2; } -/** - * \brief Generates 32 bits of pre-output data. - * - * \param state Grain-128 state. - * - * \return The generated 32 bits of pre-output data. - */ -static uint32_t grain128_preoutput(const grain128_state_t *state) +#define grain128_preoutput grain128_preoutput_inner +#define grain128_preoutput_setup(state) grain128_preoutput((state)) + +#else /* __AVR__ */ + +/* For some reason, the AVR assembly preoutput doesn't work for key setup + * but does work everywhere else. Investigate and fix this later. */ +uint32_t grain128_preoutput(const grain128_state_t *state); +#define grain128_preoutput_setup(state) grain128_preoutput_inner((state)) + +#endif /* __AVR__ */ + +uint32_t grain128_preoutput_inner(const grain128_state_t *state) { uint32_t s0, s1, s2, s3; uint32_t b0, b1, b2, b3; @@ -170,12 +170,37 @@ static uint32_t grain128_preoutput(const grain128_state_t *state) (_y) = (((_y) & (mask)) << (shift)) | (((_y) >> (shift)) & (mask)); \ } while (0) +#if defined(__AVR__) +#define GRAIN128_ASM_HELPERS 1 +#endif + +#if defined(GRAIN128_ASM_HELPERS) + +/** + * \brief Loads a 32-bit word and swaps it from big-endian bit order + * into little-endian bit order. + * + * \param data Points to the word to be loaded. + * \return Little-endian version of the 32-bit word at \a data. + */ +uint32_t grain128_swap_word32(const unsigned char *data); + +/** + * \brief Interleaves the bits in a 16-byte keystream block to separate + * out the even and odd bits. + * + * \param ks Points to the keystream block. + */ +void grain128_interleave(unsigned char *ks); + +#endif + void grain128_setup (grain128_state_t *state, const unsigned char *key, const unsigned char *nonce) { uint32_t k[4]; - unsigned round; + uint8_t round; /* Internally, the Grain-128 stream cipher uses big endian bit * order, but the Grain-128AEAD specification for NIST uses little @@ -187,26 +212,33 @@ void grain128_setup * P = [7 6 5 4 3 2 1 0 15 14 13 12 11 10 9 8 * 23 22 21 20 19 18 17 16 31 30 29 28 27 26 25 24] */ + #if defined(GRAIN128_ASM_HELPERS) #define SWAP_BITS(out, in) \ do { \ - uint32_t tmp = (in); \ + (out) = grain128_swap_word32((in)); \ + } while (0) + #else + #define SWAP_BITS(out, in) \ + do { \ + uint32_t tmp = be_load_word32((in)); \ bit_permute_step_simple(tmp, 0x55555555, 1); \ bit_permute_step_simple(tmp, 0x33333333, 2); \ bit_permute_step_simple(tmp, 0x0f0f0f0f, 4); \ (out) = tmp; \ } while (0) + #endif /* Initialize the LFSR state with the nonce and padding */ - SWAP_BITS(state->lfsr[0], be_load_word32(nonce)); - SWAP_BITS(state->lfsr[1], be_load_word32(nonce + 4)); - SWAP_BITS(state->lfsr[2], be_load_word32(nonce + 8)); + SWAP_BITS(state->lfsr[0], nonce); + SWAP_BITS(state->lfsr[1], nonce + 4); + SWAP_BITS(state->lfsr[2], nonce + 8); state->lfsr[3] = 0xFFFFFFFEU; /* pad with all-1s and a terminating 0 */ /* Initialize the NFSR state with the key */ - SWAP_BITS(k[0], be_load_word32(key)); - SWAP_BITS(k[1], be_load_word32(key + 4)); - SWAP_BITS(k[2], be_load_word32(key + 8)); - SWAP_BITS(k[3], be_load_word32(key + 12)); + SWAP_BITS(k[0], key); + SWAP_BITS(k[1], key + 4); + SWAP_BITS(k[2], key + 8); + SWAP_BITS(k[3], key + 12); state->nfsr[0] = k[0]; state->nfsr[1] = k[1]; state->nfsr[2] = k[2]; @@ -215,7 +247,7 @@ void grain128_setup /* Perform 256 rounds of Grain-128 to mix up the initial state. * The rounds can be performed 32 at a time: 32 * 8 = 256 */ for (round = 0; round < 8; ++round) { - uint32_t y = grain128_preoutput(state); + uint32_t y = grain128_preoutput_setup(state); grain128_core(state, y, y); } @@ -241,6 +273,7 @@ void grain128_setup */ static void grain128_next_keystream(grain128_state_t *state) { +#if !defined(GRAIN128_ASM_HELPERS) unsigned posn; for (posn = 0; posn < sizeof(state->ks); posn += 4) { /* Get the next word of pre-output and run the Grain-128 core */ @@ -264,6 +297,16 @@ static void grain128_next_keystream(grain128_state_t *state) bit_permute_step_simple(x, 0x00ff00ff, 8); be_store_word32(state->ks + posn, x); } +#else + /* Generate the data and then perform the interleaving */ + unsigned posn; + for (posn = 0; posn < sizeof(state->ks); posn += 4) { + uint32_t x = grain128_preoutput(state); + le_store_word32(state->ks + posn, x); + grain128_core(state, 0, 0); + } + grain128_interleave(state->ks); +#endif } void grain128_authenticate @@ -394,6 +437,8 @@ void grain128_decrypt state->posn = posn; } +#if !defined(__AVR__) + void grain128_compute_tag(grain128_state_t *state) { uint64_t x; @@ -409,3 +454,5 @@ void grain128_compute_tag(grain128_state_t *state) bit_permute_step_simple(x, 0x0f0f0f0f0f0f0f0fULL, 4); be_store_word64(state->ks, x); } + +#endif /* !__AVR__ */ diff --git a/grain-128aead/Implementations/crypto_aead/grain128aead/rhys/internal-grain128.h b/grain-128aead/Implementations/crypto_aead/grain128aead/rhys/internal-grain128.h index 4c3a6e4..ba1d260 100644 --- a/grain-128aead/Implementations/crypto_aead/grain128aead/rhys/internal-grain128.h +++ b/grain-128aead/Implementations/crypto_aead/grain128aead/rhys/internal-grain128.h @@ -28,6 +28,8 @@ /** * \file internal-grain128.h * \brief Internal implementation of the Grain-128 stream cipher. + * + * References: https://grain-128aead.github.io/ */ #ifdef __cplusplus @@ -52,6 +54,25 @@ typedef struct } grain128_state_t; /** + * \brief Performs 32 rounds of Grain-128 in parallel. + * + * \param state Grain-128 state. + * \param x 32 bits of input to be incorporated into the LFSR state, or zero. + * \param x2 Another 32 bits to be incorporated into the NFSR state, or zero. + */ +void grain128_core + (grain128_state_t *state, uint32_t x, uint32_t x2); + +/** + * \brief Generates 32 bits of pre-output data. + * + * \param state Grain-128 state. + * + * \return The generated 32 bits of pre-output data. + */ +uint32_t grain128_preoutput(const grain128_state_t *state); + +/** * \brief Sets up the initial Grain-128 state with the key and nonce. * * \param state Grain-128 state to be initialized. diff --git a/hyena/Implementations/crypto_aead/hyenav1/rhys/encrypt.c b/hyena/Implementations/crypto_aead/hyenav1/rhys/encrypt.c index db50784..9db7825 100644 --- a/hyena/Implementations/crypto_aead/hyenav1/rhys/encrypt.c +++ b/hyena/Implementations/crypto_aead/hyenav1/rhys/encrypt.c @@ -9,7 +9,7 @@ int crypto_aead_encrypt const unsigned char *npub, const unsigned char *k) { - return hyena_aead_encrypt + return hyena_v1_aead_encrypt (c, clen, m, mlen, ad, adlen, nsec, npub, k); } @@ -21,6 +21,6 @@ int crypto_aead_decrypt const unsigned char *npub, const unsigned char *k) { - return hyena_aead_decrypt + return hyena_v1_aead_decrypt (m, mlen, nsec, c, clen, ad, adlen, npub, k); } diff --git a/hyena/Implementations/crypto_aead/hyenav1/rhys/hyena.c b/hyena/Implementations/crypto_aead/hyenav1/rhys/hyena.c index 3af79fa..eaafb36 100644 --- a/hyena/Implementations/crypto_aead/hyenav1/rhys/hyena.c +++ b/hyena/Implementations/crypto_aead/hyenav1/rhys/hyena.c @@ -25,14 +25,24 @@ #include "internal-util.h" #include -aead_cipher_t const hyena_cipher = { - "HYENA", +aead_cipher_t const hyena_v1_cipher = { + "HYENA-v1", HYENA_KEY_SIZE, HYENA_NONCE_SIZE, HYENA_TAG_SIZE, AEAD_FLAG_LITTLE_ENDIAN, - hyena_aead_encrypt, - hyena_aead_decrypt + hyena_v1_aead_encrypt, + hyena_v1_aead_decrypt +}; + +aead_cipher_t const hyena_v2_cipher = { + "HYENA-v2", + HYENA_KEY_SIZE, + HYENA_NONCE_SIZE, + HYENA_TAG_SIZE, + AEAD_FLAG_LITTLE_ENDIAN, + hyena_v2_aead_encrypt, + hyena_v2_aead_decrypt }; /** @@ -52,7 +62,24 @@ static void hyena_double_delta(unsigned char D[8]) } /** - * \brief Process the associated data for HYENA. + * \brief Triples a delta value in the F(2^64) field. + * + * \param D The delta value to be tripled. + * + * D' = D ^ (D << 1) if the top-most bit is 0, or D' = D ^ (D << 1) ^ 0x1B + * otherwise. + */ +static void hyena_triple_delta(unsigned char D[8]) +{ + unsigned index; + unsigned char mask = (unsigned char)(((signed char)(D[0])) >> 7); + for (index = 0; index < 7; ++index) + D[index] ^= (D[index] << 1) | (D[index + 1] >> 7); + D[7] ^= (D[7] << 1) ^ (mask & 0x1B); +} + +/** + * \brief Process the associated data for HYENA-v1. * * \param ks Key schedule for the GIFT-128 cipher. * \param Y Internal hash state of HYENA. @@ -60,7 +87,7 @@ static void hyena_double_delta(unsigned char D[8]) * \param ad Points to the associated data. * \param adlen Length of the associated data in bytes. */ -static void hyena_process_ad +static void hyena_v1_process_ad (const gift128n_key_schedule_t *ks, unsigned char Y[16], unsigned char D[8], const unsigned char *ad, unsigned long long adlen) @@ -97,7 +124,7 @@ static void hyena_process_ad } } -int hyena_aead_encrypt +int hyena_v1_aead_encrypt (unsigned char *c, unsigned long long *clen, const unsigned char *m, unsigned long long mlen, const unsigned char *ad, unsigned long long adlen, @@ -116,8 +143,7 @@ int hyena_aead_encrypt *clen = mlen + HYENA_TAG_SIZE; /* Set up the key schedule and use it to encrypt the nonce */ - if (!gift128n_init(&ks, k, HYENA_KEY_SIZE)) - return -1; + gift128n_init(&ks, k); Y[0] = 0; if (adlen == 0) Y[0] |= 0x01; @@ -131,7 +157,7 @@ int hyena_aead_encrypt memcpy(D, Y + 8, 8); /* Process the associated data */ - hyena_process_ad(&ks, Y, D, ad, adlen); + hyena_v1_process_ad(&ks, Y, D, ad, adlen); /* Encrypt the plaintext to produce the ciphertext */ if (mlen > 0) { @@ -185,7 +211,7 @@ int hyena_aead_encrypt return 0; } -int hyena_aead_decrypt +int hyena_v1_aead_decrypt (unsigned char *m, unsigned long long *mlen, unsigned char *nsec, const unsigned char *c, unsigned long long clen, @@ -207,8 +233,7 @@ int hyena_aead_decrypt *mlen = clen - HYENA_TAG_SIZE; /* Set up the key schedule and use it to encrypt the nonce */ - if (!gift128n_init(&ks, k, HYENA_KEY_SIZE)) - return -1; + gift128n_init(&ks, k); Y[0] = 0; if (adlen == 0) Y[0] |= 0x01; @@ -222,7 +247,7 @@ int hyena_aead_decrypt memcpy(D, Y + 8, 8); /* Process the associated data */ - hyena_process_ad(&ks, Y, D, ad, adlen); + hyena_v1_process_ad(&ks, Y, D, ad, adlen); /* Decrypt the ciphertext to produce the plaintext */ clen -= HYENA_TAG_SIZE; @@ -281,3 +306,227 @@ int hyena_aead_decrypt gift128n_encrypt(&ks, Y, Y); return aead_check_tag(mtemp, *mlen, Y, c, HYENA_TAG_SIZE); } + +/** + * \brief Process the associated data for HYENA-v2. + * + * \param ks Key schedule for the GIFT-128 cipher. + * \param Y Internal hash state of HYENA. + * \param D Internal hash state of HYENA. + * \param ad Points to the associated data. + * \param adlen Length of the associated data in bytes. + */ +static void hyena_v2_process_ad + (const gift128n_key_schedule_t *ks, unsigned char Y[16], + unsigned char D[8], const unsigned char *ad, + unsigned long long adlen) +{ + unsigned char feedback[16]; + while (adlen > 16) { + hyena_double_delta(D); + memcpy(feedback, ad, 16); + lw_xor_block(feedback + 8, Y + 8, 8); + lw_xor_block(feedback + 8, D, 8); + lw_xor_block(Y, feedback, 16); + gift128n_encrypt(ks, Y, Y); + ad += 16; + adlen -= 16; + } + if (adlen == 16) { + hyena_triple_delta(D); + memcpy(feedback, ad, 16); + lw_xor_block(feedback + 8, Y + 8, 8); + lw_xor_block(feedback + 8, D, 8); + lw_xor_block(Y, feedback, 16); + } else { + unsigned temp = (unsigned)adlen; + hyena_triple_delta(D); + hyena_triple_delta(D); + memcpy(feedback, ad, temp); + feedback[temp] = 0x01; + memset(feedback + temp + 1, 0, 15 - temp); + if (temp > 8) + lw_xor_block(feedback + 8, Y + 8, temp - 8); + lw_xor_block(feedback + 8, D, 8); + lw_xor_block(Y, feedback, 16); + } +} + +int hyena_v2_aead_encrypt + (unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k) +{ + gift128n_key_schedule_t ks; + unsigned char Y[16]; + unsigned char D[8]; + unsigned char feedback[16]; + unsigned index; + (void)nsec; + + /* Set the length of the returned ciphertext */ + *clen = mlen + HYENA_TAG_SIZE; + + /* Set up the key schedule and use it to encrypt the nonce */ + gift128n_init(&ks, k); + Y[0] = 0; + if (adlen == 0) + Y[0] |= 0x01; + if (adlen == 0 && mlen == 0) + Y[0] |= 0x02; + Y[1] = 0; + Y[2] = 0; + Y[3] = 0; + memcpy(Y + 4, npub, HYENA_NONCE_SIZE); + gift128n_encrypt(&ks, Y, Y); + memcpy(D, Y + 8, 8); + + /* Process the associated data */ + hyena_v2_process_ad(&ks, Y, D, ad, adlen); + + /* Encrypt the plaintext to produce the ciphertext */ + if (mlen > 0) { + while (mlen > 16) { + gift128n_encrypt(&ks, Y, Y); + hyena_double_delta(D); + memcpy(feedback, m, 16); + lw_xor_block(feedback + 8, Y + 8, 8); + lw_xor_block(feedback + 8, D, 8); + lw_xor_block_2_src(c, m, Y, 16); + lw_xor_block(Y, feedback, 16); + c += 16; + m += 16; + mlen -= 16; + } + gift128n_encrypt(&ks, Y, Y); + if (mlen == 16) { + hyena_triple_delta(D); + memcpy(feedback, m, 16); + lw_xor_block(feedback + 8, Y + 8, 8); + lw_xor_block(feedback + 8, D, 8); + lw_xor_block_2_src(c, m, Y, 16); + lw_xor_block(Y, feedback, 16); + c += 16; + } else { + unsigned temp = (unsigned)mlen; + hyena_triple_delta(D); + hyena_triple_delta(D); + memcpy(feedback, m, temp); + feedback[temp] = 0x01; + memset(feedback + temp + 1, 0, 15 - temp); + if (temp > 8) + lw_xor_block(feedback + 8, Y + 8, temp - 8); + lw_xor_block(feedback + 8, D, 8); + lw_xor_block_2_src(c, m, Y, temp); + lw_xor_block(Y, feedback, 16); + c += temp; + } + } + + /* Swap the two halves of Y and generate the authentication tag */ + for (index = 0; index < 8; ++index) { + unsigned char temp1 = Y[index]; + unsigned char temp2 = Y[index + 8]; + Y[index] = temp2; + Y[index + 8] = temp1; + } + gift128n_encrypt(&ks, c, Y); + return 0; +} + +int hyena_v2_aead_decrypt + (unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k) +{ + gift128n_key_schedule_t ks; + unsigned char Y[16]; + unsigned char D[8]; + unsigned char feedback[16]; + unsigned char *mtemp; + unsigned index; + (void)nsec; + + /* Validate the ciphertext length and set the return "mlen" value */ + if (clen < HYENA_TAG_SIZE) + return -1; + *mlen = clen - HYENA_TAG_SIZE; + + /* Set up the key schedule and use it to encrypt the nonce */ + gift128n_init(&ks, k); + Y[0] = 0; + if (adlen == 0) + Y[0] |= 0x01; + if (adlen == 0 && clen == HYENA_TAG_SIZE) + Y[0] |= 0x02; + Y[1] = 0; + Y[2] = 0; + Y[3] = 0; + memcpy(Y + 4, npub, HYENA_NONCE_SIZE); + gift128n_encrypt(&ks, Y, Y); + memcpy(D, Y + 8, 8); + + /* Process the associated data */ + hyena_v2_process_ad(&ks, Y, D, ad, adlen); + + /* Decrypt the ciphertext to produce the plaintext */ + clen -= HYENA_TAG_SIZE; + mtemp = m; + if (clen > 0) { + while (clen > 16) { + gift128n_encrypt(&ks, Y, Y); + hyena_double_delta(D); + memcpy(feedback + 8, c + 8, 8); + lw_xor_block_2_src(m, c, Y, 16); + memcpy(feedback, m, 8); + lw_xor_block(feedback + 8, D, 8); + lw_xor_block(Y, feedback, 16); + c += 16; + m += 16; + clen -= 16; + } + gift128n_encrypt(&ks, Y, Y); + if (clen == 16) { + hyena_triple_delta(D); + memcpy(feedback + 8, c + 8, 8); + lw_xor_block_2_src(m, c, Y, 16); + memcpy(feedback, m, 8); + lw_xor_block(feedback + 8, D, 8); + lw_xor_block(Y, feedback, 16); + c += 16; + } else { + unsigned temp = (unsigned)clen; + hyena_triple_delta(D); + hyena_triple_delta(D); + if (temp > 8) { + memcpy(feedback + 8, c + 8, temp - 8); + lw_xor_block_2_src(m, c, Y, temp); + memcpy(feedback, m, 8); + } else { + lw_xor_block_2_src(m, c, Y, temp); + memcpy(feedback, m, temp); + } + feedback[temp] = 0x01; + memset(feedback + temp + 1, 0, 15 - temp); + lw_xor_block(feedback + 8, D, 8); + lw_xor_block(Y, feedback, 16); + c += temp; + } + } + + /* Swap the two halves of Y and check the authentication tag */ + for (index = 0; index < 8; ++index) { + unsigned char temp1 = Y[index]; + unsigned char temp2 = Y[index + 8]; + Y[index] = temp2; + Y[index + 8] = temp1; + } + gift128n_encrypt(&ks, Y, Y); + return aead_check_tag(mtemp, *mlen, Y, c, HYENA_TAG_SIZE); +} diff --git a/hyena/Implementations/crypto_aead/hyenav1/rhys/hyena.h b/hyena/Implementations/crypto_aead/hyenav1/rhys/hyena.h index ee9bb9c..79585d5 100644 --- a/hyena/Implementations/crypto_aead/hyenav1/rhys/hyena.h +++ b/hyena/Implementations/crypto_aead/hyenav1/rhys/hyena.h @@ -33,6 +33,12 @@ * GIFT-128 block cipher. The algorithm has a 128-bit key, a 96-bit nonce, * and a 128-bit authentication tag. * + * This library implements both the v1 and v2 versions of HYENA from the + * authors. The v1 version was submitted to the second round of the + * NIST Lightweight Cryptography Competition but was later found to have a + * forgery attack. The authors fixed this with v2 but it was too late to + * submit the update for the second round. + * * References: https://www.isical.ac.in/~lightweight/hyena/ */ @@ -56,12 +62,76 @@ extern "C" { #define HYENA_NONCE_SIZE 12 /** - * \brief Meta-information block for the HYENA cipher. + * \brief Meta-information block for the HYENA-v1 cipher. + */ +extern aead_cipher_t const hyena_v1_cipher; + +/** + * \brief Meta-information block for the HYENA-v2 cipher. + */ +extern aead_cipher_t const hyena_v2_cipher; + +/** + * \brief Encrypts and authenticates a packet with HYENA-v1. + * + * \param c Buffer to receive the output. + * \param clen On exit, set to the length of the output which includes + * the ciphertext and the 16 byte authentication tag. + * \param m Buffer that contains the plaintext message to encrypt. + * \param mlen Length of the plaintext message in bytes. + * \param ad Buffer that contains associated data to authenticate + * along with the packet but which does not need to be encrypted. + * \param adlen Length of the associated data in bytes. + * \param nsec Secret nonce - not used by this algorithm. + * \param npub Points to the public nonce for the packet which must + * be 12 bytes in length. + * \param k Points to the 16 bytes of the key to use to encrypt the packet. + * + * \return 0 on success, or a negative value if there was an error in + * the parameters. + * + * \sa hyena_aead_decrypt() */ -extern aead_cipher_t const hyena_cipher; +int hyena_v1_aead_encrypt + (unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k); + +/** + * \brief Decrypts and authenticates a packet with HYENA-v1. + * + * \param m Buffer to receive the plaintext message on output. + * \param mlen Receives the length of the plaintext message on output. + * \param nsec Secret nonce - not used by this algorithm. + * \param c Buffer that contains the ciphertext and authentication + * tag to decrypt. + * \param clen Length of the input data in bytes, which includes the + * ciphertext and the 16 byte authentication tag. + * \param ad Buffer that contains associated data to authenticate + * along with the packet but which does not need to be encrypted. + * \param adlen Length of the associated data in bytes. + * \param npub Points to the public nonce for the packet which must + * be 12 bytes in length. + * \param k Points to the 16 bytes of the key to use to decrypt the packet. + * + * \return 0 on success, -1 if the authentication tag was incorrect, + * or some other negative number if there was an error in the parameters. + * + * \sa hyena_aead_encrypt() + */ +int hyena_v1_aead_decrypt + (unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k); /** - * \brief Encrypts and authenticates a packet with HYENA. + * \brief Encrypts and authenticates a packet with HYENA-v2. * * \param c Buffer to receive the output. * \param clen On exit, set to the length of the output which includes @@ -81,7 +151,7 @@ extern aead_cipher_t const hyena_cipher; * * \sa hyena_aead_decrypt() */ -int hyena_aead_encrypt +int hyena_v2_aead_encrypt (unsigned char *c, unsigned long long *clen, const unsigned char *m, unsigned long long mlen, const unsigned char *ad, unsigned long long adlen, @@ -90,7 +160,7 @@ int hyena_aead_encrypt const unsigned char *k); /** - * \brief Decrypts and authenticates a packet with HYENA. + * \brief Decrypts and authenticates a packet with HYENA-v2. * * \param m Buffer to receive the plaintext message on output. * \param mlen Receives the length of the plaintext message on output. @@ -111,7 +181,7 @@ int hyena_aead_encrypt * * \sa hyena_aead_encrypt() */ -int hyena_aead_decrypt +int hyena_v2_aead_decrypt (unsigned char *m, unsigned long long *mlen, unsigned char *nsec, const unsigned char *c, unsigned long long clen, diff --git a/hyena/Implementations/crypto_aead/hyenav1/rhys/internal-gift128.c b/hyena/Implementations/crypto_aead/hyenav1/rhys/internal-gift128.c index 681dbc8..c6ac5ec 100644 --- a/hyena/Implementations/crypto_aead/hyenav1/rhys/internal-gift128.c +++ b/hyena/Implementations/crypto_aead/hyenav1/rhys/internal-gift128.c @@ -23,8 +23,12 @@ #include "internal-gift128.h" #include "internal-util.h" +#if !GIFT128_VARIANT_ASM + +#if GIFT128_VARIANT != GIFT128_VARIANT_TINY + /* Round constants for GIFT-128 in the fixsliced representation */ -static uint32_t const GIFT128_RC[40] = { +static uint32_t const GIFT128_RC_fixsliced[40] = { 0x10000008, 0x80018000, 0x54000002, 0x01010181, 0x8000001f, 0x10888880, 0x6001e000, 0x51500002, 0x03030180, 0x8000002f, 0x10088880, 0x60016000, 0x41500002, 0x03030080, 0x80000027, 0x10008880, 0x4001e000, 0x11500002, @@ -34,6 +38,246 @@ static uint32_t const GIFT128_RC[40] = { 0xc001a000, 0x14500002, 0x01020181, 0x8000001a }; +#endif + +#if GIFT128_VARIANT != GIFT128_VARIANT_FULL + +/* Round constants for GIFT-128 in the bitsliced representation */ +static uint8_t const GIFT128_RC[40] = { + 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3E, 0x3D, 0x3B, + 0x37, 0x2F, 0x1E, 0x3C, 0x39, 0x33, 0x27, 0x0E, + 0x1D, 0x3A, 0x35, 0x2B, 0x16, 0x2C, 0x18, 0x30, + 0x21, 0x02, 0x05, 0x0B, 0x17, 0x2E, 0x1C, 0x38, + 0x31, 0x23, 0x06, 0x0D, 0x1B, 0x36, 0x2D, 0x1A +}; + +#endif + +/* http://programming.sirrida.de/perm_fn.html#bit_permute_step */ +#define bit_permute_step(_y, mask, shift) \ + do { \ + uint32_t y = (_y); \ + uint32_t t = ((y >> (shift)) ^ y) & (mask); \ + (_y) = (y ^ t) ^ (t << (shift)); \ + } while (0) + +/* + * The permutation below was generated by the online permuation generator at + * "http://programming.sirrida.de/calcperm.php". + * + * All of the permutuations are essentially the same, except that each is + * rotated by 8 bits with respect to the next: + * + * P0: 0 24 16 8 1 25 17 9 2 26 18 10 3 27 19 11 4 28 20 12 5 29 21 13 6 30 22 14 7 31 23 15 + * P1: 8 0 24 16 9 1 25 17 10 2 26 18 11 3 27 19 12 4 28 20 13 5 29 21 14 6 30 22 15 7 31 23 + * P2: 16 8 0 24 17 9 1 25 18 10 2 26 19 11 3 27 20 12 4 28 21 13 5 29 22 14 6 30 23 15 7 31 + * P3: 24 16 8 0 25 17 9 1 26 18 10 2 27 19 11 3 28 20 12 4 29 21 13 5 30 22 14 6 31 23 15 7 + * + * The most efficient permutation from the online generator was P3, so we + * perform it as the core of the others, and then perform a final rotation. + * + * It is possible to do slightly better than "P3 then rotate" on desktop and + * server architectures for the other permutations. But the advantage isn't + * as evident on embedded platforms so we keep things simple. + */ +#define PERM3_INNER(x) \ + do { \ + bit_permute_step(x, 0x0a0a0a0a, 3); \ + bit_permute_step(x, 0x00cc00cc, 6); \ + bit_permute_step(x, 0x0000f0f0, 12); \ + bit_permute_step(x, 0x000000ff, 24); \ + } while (0) +#define PERM0(x) \ + do { \ + uint32_t _x = (x); \ + PERM3_INNER(_x); \ + (x) = leftRotate8(_x); \ + } while (0) +#define PERM1(x) \ + do { \ + uint32_t _x = (x); \ + PERM3_INNER(_x); \ + (x) = leftRotate16(_x); \ + } while (0) +#define PERM2(x) \ + do { \ + uint32_t _x = (x); \ + PERM3_INNER(_x); \ + (x) = leftRotate24(_x); \ + } while (0) +#define PERM3(x) \ + do { \ + uint32_t _x = (x); \ + PERM3_INNER(_x); \ + (x) = _x; \ + } while (0) + +#define INV_PERM3_INNER(x) \ + do { \ + bit_permute_step(x, 0x00550055, 9); \ + bit_permute_step(x, 0x00003333, 18); \ + bit_permute_step(x, 0x000f000f, 12); \ + bit_permute_step(x, 0x000000ff, 24); \ + } while (0) +#define INV_PERM0(x) \ + do { \ + uint32_t _x = rightRotate8(x); \ + INV_PERM3_INNER(_x); \ + (x) = _x; \ + } while (0) +#define INV_PERM1(x) \ + do { \ + uint32_t _x = rightRotate16(x); \ + INV_PERM3_INNER(_x); \ + (x) = _x; \ + } while (0) +#define INV_PERM2(x) \ + do { \ + uint32_t _x = rightRotate24(x); \ + INV_PERM3_INNER(_x); \ + (x) = _x; \ + } while (0) +#define INV_PERM3(x) \ + do { \ + uint32_t _x = (x); \ + INV_PERM3_INNER(_x); \ + (x) = _x; \ + } while (0) + +/** + * \brief Converts the GIFT-128 nibble-based representation into word-based. + * + * \param output Output buffer to write the word-based version to. + * \param input Input buffer to read the nibble-based version from. + * + * The \a input and \a output buffers can be the same buffer. + */ +static void gift128n_to_words + (unsigned char *output, const unsigned char *input) +{ + uint32_t s0, s1, s2, s3; + + /* Load the input buffer into 32-bit words. We use the nibble order + * from the HYENA submission to NIST which is byte-reversed with respect + * to the nibble order of the original GIFT-128 paper. Nibble zero is in + * the first byte instead of the last, which means little-endian order. */ + s0 = le_load_word32(input + 12); + s1 = le_load_word32(input + 8); + s2 = le_load_word32(input + 4); + s3 = le_load_word32(input); + + /* Rearrange the bits so that bits 0..3 of each nibble are + * scattered to bytes 0..3 of each word. The permutation is: + * + * 0 8 16 24 1 9 17 25 2 10 18 26 3 11 19 27 4 12 20 28 5 13 21 29 6 14 22 30 7 15 23 31 + * + * Generated with "http://programming.sirrida.de/calcperm.php". + */ + #define PERM_WORDS(_x) \ + do { \ + uint32_t x = (_x); \ + bit_permute_step(x, 0x0a0a0a0a, 3); \ + bit_permute_step(x, 0x00cc00cc, 6); \ + bit_permute_step(x, 0x0000f0f0, 12); \ + bit_permute_step(x, 0x0000ff00, 8); \ + (_x) = x; \ + } while (0) + PERM_WORDS(s0); + PERM_WORDS(s1); + PERM_WORDS(s2); + PERM_WORDS(s3); + + /* Rearrange the bytes and write them to the output buffer */ + output[0] = (uint8_t)s0; + output[1] = (uint8_t)s1; + output[2] = (uint8_t)s2; + output[3] = (uint8_t)s3; + output[4] = (uint8_t)(s0 >> 8); + output[5] = (uint8_t)(s1 >> 8); + output[6] = (uint8_t)(s2 >> 8); + output[7] = (uint8_t)(s3 >> 8); + output[8] = (uint8_t)(s0 >> 16); + output[9] = (uint8_t)(s1 >> 16); + output[10] = (uint8_t)(s2 >> 16); + output[11] = (uint8_t)(s3 >> 16); + output[12] = (uint8_t)(s0 >> 24); + output[13] = (uint8_t)(s1 >> 24); + output[14] = (uint8_t)(s2 >> 24); + output[15] = (uint8_t)(s3 >> 24); +} + +/** + * \brief Converts the GIFT-128 word-based representation into nibble-based. + * + * \param output Output buffer to write the nibble-based version to. + * \param input Input buffer to read the word-based version from. + */ +static void gift128n_to_nibbles + (unsigned char *output, const unsigned char *input) +{ + uint32_t s0, s1, s2, s3; + + /* Load the input bytes and rearrange them so that s0 contains the + * most significant nibbles and s3 contains the least significant */ + s0 = (((uint32_t)(input[12])) << 24) | + (((uint32_t)(input[8])) << 16) | + (((uint32_t)(input[4])) << 8) | + ((uint32_t)(input[0])); + s1 = (((uint32_t)(input[13])) << 24) | + (((uint32_t)(input[9])) << 16) | + (((uint32_t)(input[5])) << 8) | + ((uint32_t)(input[1])); + s2 = (((uint32_t)(input[14])) << 24) | + (((uint32_t)(input[10])) << 16) | + (((uint32_t)(input[6])) << 8) | + ((uint32_t)(input[2])); + s3 = (((uint32_t)(input[15])) << 24) | + (((uint32_t)(input[11])) << 16) | + (((uint32_t)(input[7])) << 8) | + ((uint32_t)(input[3])); + + /* Apply the inverse of PERM_WORDS() from the function above */ + #define INV_PERM_WORDS(_x) \ + do { \ + uint32_t x = (_x); \ + bit_permute_step(x, 0x00aa00aa, 7); \ + bit_permute_step(x, 0x0000cccc, 14); \ + bit_permute_step(x, 0x00f000f0, 4); \ + bit_permute_step(x, 0x0000ff00, 8); \ + (_x) = x; \ + } while (0) + INV_PERM_WORDS(s0); + INV_PERM_WORDS(s1); + INV_PERM_WORDS(s2); + INV_PERM_WORDS(s3); + + /* Store the result into the output buffer as 32-bit words */ + le_store_word32(output + 12, s0); + le_store_word32(output + 8, s1); + le_store_word32(output + 4, s2); + le_store_word32(output, s3); +} + +void gift128n_encrypt + (const gift128n_key_schedule_t *ks, unsigned char *output, + const unsigned char *input) +{ + gift128n_to_words(output, input); + gift128b_encrypt(ks, output, output); + gift128n_to_nibbles(output, output); +} + +void gift128n_decrypt + (const gift128n_key_schedule_t *ks, unsigned char *output, + const unsigned char *input) +{ + gift128n_to_words(output, input); + gift128b_decrypt(ks, output, output); + gift128n_to_nibbles(output, output); +} + +#if GIFT128_VARIANT != GIFT128_VARIANT_TINY + /** * \brief Swaps bits within two words. * @@ -202,21 +446,27 @@ static void gift128b_compute_round_keys /* Keys 8, 9, 18, and 19 do not need any adjustment */ } +#if GIFT128_VARIANT == GIFT128_VARIANT_FULL /* Derive the fixsliced keys for the remaining rounds 11..40 */ for (index = 20; index < 80; index += 10) { gift128b_derive_keys(ks->k + index, ks->k + index - 20); } +#endif } -int gift128b_init - (gift128b_key_schedule_t *ks, const unsigned char *key, size_t key_len) +void gift128b_init(gift128b_key_schedule_t *ks, const unsigned char *key) { - if (!ks || !key || key_len != 16) - return 0; gift128b_compute_round_keys (ks, be_load_word32(key), be_load_word32(key + 4), be_load_word32(key + 8), be_load_word32(key + 12)); - return 1; +} + +void gift128n_init(gift128n_key_schedule_t *ks, const unsigned char *key) +{ + /* Use the little-endian key byte order from the HYENA submission */ + gift128b_compute_round_keys + (ks, le_load_word32(key + 12), le_load_word32(key + 8), + le_load_word32(key + 4), le_load_word32(key)); } /** @@ -521,11 +771,37 @@ int gift128b_init gift128b_inv_sbox(s3, s1, s2, s0); \ } while (0) +#else /* GIFT128_VARIANT_TINY */ + +void gift128b_init(gift128b_key_schedule_t *ks, const unsigned char *key) +{ + /* Mirror the fixslicing word order of 3, 1, 2, 0 */ + ks->k[0] = be_load_word32(key + 12); + ks->k[1] = be_load_word32(key + 4); + ks->k[2] = be_load_word32(key + 8); + ks->k[3] = be_load_word32(key); +} + +void gift128n_init(gift128n_key_schedule_t *ks, const unsigned char *key) +{ + /* Use the little-endian key byte order from the HYENA submission + * and mirror the fixslicing word order of 3, 1, 2, 0 */ + ks->k[0] = le_load_word32(key); + ks->k[1] = le_load_word32(key + 8); + ks->k[2] = le_load_word32(key + 4); + ks->k[3] = le_load_word32(key + 12); +} + +#endif /* GIFT128_VARIANT_TINY */ + +#if GIFT128_VARIANT == GIFT128_VARIANT_SMALL + void gift128b_encrypt (const gift128b_key_schedule_t *ks, unsigned char *output, const unsigned char *input) { uint32_t s0, s1, s2, s3; + uint32_t k[20]; /* Copy the plaintext into the state buffer and convert from big endian */ s0 = be_load_word32(input); @@ -534,14 +810,20 @@ void gift128b_encrypt s3 = be_load_word32(input + 12); /* Perform all 40 rounds five at a time using the fixsliced method */ - gift128b_encrypt_5_rounds(ks->k, GIFT128_RC); - gift128b_encrypt_5_rounds(ks->k + 10, GIFT128_RC + 5); - gift128b_encrypt_5_rounds(ks->k + 20, GIFT128_RC + 10); - gift128b_encrypt_5_rounds(ks->k + 30, GIFT128_RC + 15); - gift128b_encrypt_5_rounds(ks->k + 40, GIFT128_RC + 20); - gift128b_encrypt_5_rounds(ks->k + 50, GIFT128_RC + 25); - gift128b_encrypt_5_rounds(ks->k + 60, GIFT128_RC + 30); - gift128b_encrypt_5_rounds(ks->k + 70, GIFT128_RC + 35); + gift128b_encrypt_5_rounds(ks->k, GIFT128_RC_fixsliced); + gift128b_encrypt_5_rounds(ks->k + 10, GIFT128_RC_fixsliced + 5); + gift128b_derive_keys(k, ks->k); + gift128b_derive_keys(k + 10, ks->k + 10); + gift128b_encrypt_5_rounds(k, GIFT128_RC_fixsliced + 10); + gift128b_encrypt_5_rounds(k + 10, GIFT128_RC_fixsliced + 15); + gift128b_derive_keys(k, k); + gift128b_derive_keys(k + 10, k + 10); + gift128b_encrypt_5_rounds(k, GIFT128_RC_fixsliced + 20); + gift128b_encrypt_5_rounds(k + 10, GIFT128_RC_fixsliced + 25); + gift128b_derive_keys(k, k); + gift128b_derive_keys(k + 10, k + 10); + gift128b_encrypt_5_rounds(k, GIFT128_RC_fixsliced + 30); + gift128b_encrypt_5_rounds(k + 10, GIFT128_RC_fixsliced + 35); /* Pack the state into the ciphertext buffer in big endian */ be_store_word32(output, s0); @@ -555,6 +837,7 @@ void gift128b_encrypt_preloaded const uint32_t input[4]) { uint32_t s0, s1, s2, s3; + uint32_t k[20]; /* Copy the plaintext into local variables */ s0 = input[0]; @@ -563,14 +846,20 @@ void gift128b_encrypt_preloaded s3 = input[3]; /* Perform all 40 rounds five at a time using the fixsliced method */ - gift128b_encrypt_5_rounds(ks->k, GIFT128_RC); - gift128b_encrypt_5_rounds(ks->k + 10, GIFT128_RC + 5); - gift128b_encrypt_5_rounds(ks->k + 20, GIFT128_RC + 10); - gift128b_encrypt_5_rounds(ks->k + 30, GIFT128_RC + 15); - gift128b_encrypt_5_rounds(ks->k + 40, GIFT128_RC + 20); - gift128b_encrypt_5_rounds(ks->k + 50, GIFT128_RC + 25); - gift128b_encrypt_5_rounds(ks->k + 60, GIFT128_RC + 30); - gift128b_encrypt_5_rounds(ks->k + 70, GIFT128_RC + 35); + gift128b_encrypt_5_rounds(ks->k, GIFT128_RC_fixsliced); + gift128b_encrypt_5_rounds(ks->k + 10, GIFT128_RC_fixsliced + 5); + gift128b_derive_keys(k, ks->k); + gift128b_derive_keys(k + 10, ks->k + 10); + gift128b_encrypt_5_rounds(k, GIFT128_RC_fixsliced + 10); + gift128b_encrypt_5_rounds(k + 10, GIFT128_RC_fixsliced + 15); + gift128b_derive_keys(k, k); + gift128b_derive_keys(k + 10, k + 10); + gift128b_encrypt_5_rounds(k, GIFT128_RC_fixsliced + 20); + gift128b_encrypt_5_rounds(k + 10, GIFT128_RC_fixsliced + 25); + gift128b_derive_keys(k, k); + gift128b_derive_keys(k + 10, k + 10); + gift128b_encrypt_5_rounds(k, GIFT128_RC_fixsliced + 30); + gift128b_encrypt_5_rounds(k + 10, GIFT128_RC_fixsliced + 35); /* Pack the state into the ciphertext buffer */ output[0] = s0; @@ -579,7 +868,55 @@ void gift128b_encrypt_preloaded output[3] = s3; } -void gift128b_decrypt +void gift128t_encrypt + (const gift128n_key_schedule_t *ks, unsigned char *output, + const unsigned char *input, uint32_t tweak) +{ + uint32_t s0, s1, s2, s3; + uint32_t k[20]; + + /* Copy the plaintext into the state buffer and convert from nibbles */ + gift128n_to_words(output, input); + s0 = be_load_word32(output); + s1 = be_load_word32(output + 4); + s2 = be_load_word32(output + 8); + s3 = be_load_word32(output + 12); + + /* Perform all 40 rounds five at a time using the fixsliced method. + * Every 5 rounds except the last we add the tweak value to the state */ + gift128b_encrypt_5_rounds(ks->k, GIFT128_RC_fixsliced); + s0 ^= tweak; + gift128b_encrypt_5_rounds(ks->k + 10, GIFT128_RC_fixsliced + 5); + s0 ^= tweak; + gift128b_derive_keys(k, ks->k); + gift128b_derive_keys(k + 10, ks->k + 10); + gift128b_encrypt_5_rounds(k, GIFT128_RC_fixsliced + 10); + s0 ^= tweak; + gift128b_encrypt_5_rounds(k + 10, GIFT128_RC_fixsliced + 15); + s0 ^= tweak; + gift128b_derive_keys(k, k); + gift128b_derive_keys(k + 10, k + 10); + gift128b_encrypt_5_rounds(k, GIFT128_RC_fixsliced + 20); + s0 ^= tweak; + gift128b_encrypt_5_rounds(k + 10, GIFT128_RC_fixsliced + 25); + s0 ^= tweak; + gift128b_derive_keys(k, k); + gift128b_derive_keys(k + 10, k + 10); + gift128b_encrypt_5_rounds(k, GIFT128_RC_fixsliced + 30); + s0 ^= tweak; + gift128b_encrypt_5_rounds(k + 10, GIFT128_RC_fixsliced + 35); + + /* Pack the state into the ciphertext buffer in nibble form */ + be_store_word32(output, s0); + be_store_word32(output + 4, s1); + be_store_word32(output + 8, s2); + be_store_word32(output + 12, s3); + gift128n_to_nibbles(output, output); +} + +#elif GIFT128_VARIANT == GIFT128_VARIANT_FULL + +void gift128b_encrypt (const gift128b_key_schedule_t *ks, unsigned char *output, const unsigned char *input) { @@ -592,14 +929,14 @@ void gift128b_decrypt s3 = be_load_word32(input + 12); /* Perform all 40 rounds five at a time using the fixsliced method */ - gift128b_decrypt_5_rounds(ks->k + 70, GIFT128_RC + 35); - gift128b_decrypt_5_rounds(ks->k + 60, GIFT128_RC + 30); - gift128b_decrypt_5_rounds(ks->k + 50, GIFT128_RC + 25); - gift128b_decrypt_5_rounds(ks->k + 40, GIFT128_RC + 20); - gift128b_decrypt_5_rounds(ks->k + 30, GIFT128_RC + 15); - gift128b_decrypt_5_rounds(ks->k + 20, GIFT128_RC + 10); - gift128b_decrypt_5_rounds(ks->k + 10, GIFT128_RC + 5); - gift128b_decrypt_5_rounds(ks->k, GIFT128_RC); + gift128b_encrypt_5_rounds(ks->k, GIFT128_RC_fixsliced); + gift128b_encrypt_5_rounds(ks->k + 10, GIFT128_RC_fixsliced + 5); + gift128b_encrypt_5_rounds(ks->k + 20, GIFT128_RC_fixsliced + 10); + gift128b_encrypt_5_rounds(ks->k + 30, GIFT128_RC_fixsliced + 15); + gift128b_encrypt_5_rounds(ks->k + 40, GIFT128_RC_fixsliced + 20); + gift128b_encrypt_5_rounds(ks->k + 50, GIFT128_RC_fixsliced + 25); + gift128b_encrypt_5_rounds(ks->k + 60, GIFT128_RC_fixsliced + 30); + gift128b_encrypt_5_rounds(ks->k + 70, GIFT128_RC_fixsliced + 35); /* Pack the state into the ciphertext buffer in big endian */ be_store_word32(output, s0); @@ -608,173 +945,308 @@ void gift128b_decrypt be_store_word32(output + 12, s3); } -int gift128n_init - (gift128n_key_schedule_t *ks, const unsigned char *key, size_t key_len) +void gift128b_encrypt_preloaded + (const gift128b_key_schedule_t *ks, uint32_t output[4], + const uint32_t input[4]) { - /* Use the little-endian key byte order from the HYENA submission */ - if (!ks || !key || key_len != 16) - return 0; - gift128b_compute_round_keys - (ks, le_load_word32(key + 12), le_load_word32(key + 8), - le_load_word32(key + 4), le_load_word32(key)); - return 1; + uint32_t s0, s1, s2, s3; + + /* Copy the plaintext into local variables */ + s0 = input[0]; + s1 = input[1]; + s2 = input[2]; + s3 = input[3]; + + /* Perform all 40 rounds five at a time using the fixsliced method */ + gift128b_encrypt_5_rounds(ks->k, GIFT128_RC_fixsliced); + gift128b_encrypt_5_rounds(ks->k + 10, GIFT128_RC_fixsliced + 5); + gift128b_encrypt_5_rounds(ks->k + 20, GIFT128_RC_fixsliced + 10); + gift128b_encrypt_5_rounds(ks->k + 30, GIFT128_RC_fixsliced + 15); + gift128b_encrypt_5_rounds(ks->k + 40, GIFT128_RC_fixsliced + 20); + gift128b_encrypt_5_rounds(ks->k + 50, GIFT128_RC_fixsliced + 25); + gift128b_encrypt_5_rounds(ks->k + 60, GIFT128_RC_fixsliced + 30); + gift128b_encrypt_5_rounds(ks->k + 70, GIFT128_RC_fixsliced + 35); + + /* Pack the state into the ciphertext buffer */ + output[0] = s0; + output[1] = s1; + output[2] = s2; + output[3] = s3; } -/* http://programming.sirrida.de/perm_fn.html#bit_permute_step */ -#define bit_permute_step(_y, mask, shift) \ - do { \ - uint32_t y = (_y); \ - uint32_t t = ((y >> (shift)) ^ y) & (mask); \ - (_y) = (y ^ t) ^ (t << (shift)); \ - } while (0) +void gift128t_encrypt + (const gift128n_key_schedule_t *ks, unsigned char *output, + const unsigned char *input, uint32_t tweak) +{ + uint32_t s0, s1, s2, s3; -/** - * \brief Converts the GIFT-128 nibble-based representation into word-based. - * - * \param output Output buffer to write the word-based version to. - * \param input Input buffer to read the nibble-based version from. - * - * The \a input and \a output buffers can be the same buffer. - */ -static void gift128n_to_words - (unsigned char *output, const unsigned char *input) + /* Copy the plaintext into the state buffer and convert from nibbles */ + gift128n_to_words(output, input); + s0 = be_load_word32(output); + s1 = be_load_word32(output + 4); + s2 = be_load_word32(output + 8); + s3 = be_load_word32(output + 12); + + /* Perform all 40 rounds five at a time using the fixsliced method. + * Every 5 rounds except the last we add the tweak value to the state */ + gift128b_encrypt_5_rounds(ks->k, GIFT128_RC_fixsliced); + s0 ^= tweak; + gift128b_encrypt_5_rounds(ks->k + 10, GIFT128_RC_fixsliced + 5); + s0 ^= tweak; + gift128b_encrypt_5_rounds(ks->k + 20, GIFT128_RC_fixsliced + 10); + s0 ^= tweak; + gift128b_encrypt_5_rounds(ks->k + 30, GIFT128_RC_fixsliced + 15); + s0 ^= tweak; + gift128b_encrypt_5_rounds(ks->k + 40, GIFT128_RC_fixsliced + 20); + s0 ^= tweak; + gift128b_encrypt_5_rounds(ks->k + 50, GIFT128_RC_fixsliced + 25); + s0 ^= tweak; + gift128b_encrypt_5_rounds(ks->k + 60, GIFT128_RC_fixsliced + 30); + s0 ^= tweak; + gift128b_encrypt_5_rounds(ks->k + 70, GIFT128_RC_fixsliced + 35); + + /* Pack the state into the ciphertext buffer in nibble form */ + be_store_word32(output, s0); + be_store_word32(output + 4, s1); + be_store_word32(output + 8, s2); + be_store_word32(output + 12, s3); + gift128n_to_nibbles(output, output); +} + +#else /* GIFT128_VARIANT_TINY */ + +void gift128b_encrypt + (const gift128b_key_schedule_t *ks, unsigned char *output, + const unsigned char *input) { uint32_t s0, s1, s2, s3; + uint32_t w0, w1, w2, w3; + uint32_t temp; + uint8_t round; - /* Load the input buffer into 32-bit words. We use the nibble order - * from the HYENA submission to NIST which is byte-reversed with respect - * to the nibble order of the original GIFT-128 paper. Nibble zero is in - * the first byte instead of the last, which means little-endian order. */ - s0 = le_load_word32(input + 12); - s1 = le_load_word32(input + 8); - s2 = le_load_word32(input + 4); - s3 = le_load_word32(input); + /* Copy the plaintext into the state buffer and convert from big endian */ + s0 = be_load_word32(input); + s1 = be_load_word32(input + 4); + s2 = be_load_word32(input + 8); + s3 = be_load_word32(input + 12); - /* Rearrange the bits so that bits 0..3 of each nibble are - * scattered to bytes 0..3 of each word. The permutation is: - * - * 0 8 16 24 1 9 17 25 2 10 18 26 3 11 19 27 4 12 20 28 5 13 21 29 6 14 22 30 7 15 23 31 - * - * Generated with "http://programming.sirrida.de/calcperm.php". - */ - #define PERM_WORDS(_x) \ - do { \ - uint32_t x = (_x); \ - bit_permute_step(x, 0x0a0a0a0a, 3); \ - bit_permute_step(x, 0x00cc00cc, 6); \ - bit_permute_step(x, 0x0000f0f0, 12); \ - bit_permute_step(x, 0x0000ff00, 8); \ - (_x) = x; \ - } while (0) - PERM_WORDS(s0); - PERM_WORDS(s1); - PERM_WORDS(s2); - PERM_WORDS(s3); + /* The key schedule is initialized with the key itself */ + w0 = ks->k[3]; + w1 = ks->k[1]; + w2 = ks->k[2]; + w3 = ks->k[0]; + + /* Perform all 40 rounds */ + for (round = 0; round < 40; ++round) { + /* SubCells - apply the S-box */ + s1 ^= s0 & s2; + s0 ^= s1 & s3; + s2 ^= s0 | s1; + s3 ^= s2; + s1 ^= s3; + s3 ^= 0xFFFFFFFFU; + s2 ^= s0 & s1; + temp = s0; + s0 = s3; + s3 = temp; + + /* PermBits - apply the 128-bit permutation */ + PERM0(s0); + PERM1(s1); + PERM2(s2); + PERM3(s3); + + /* AddRoundKey - XOR in the key schedule and the round constant */ + s2 ^= w1; + s1 ^= w3; + s3 ^= 0x80000000U ^ GIFT128_RC[round]; + + /* Rotate the key schedule */ + temp = w3; + w3 = w2; + w2 = w1; + w1 = w0; + w0 = ((temp & 0xFFFC0000U) >> 2) | ((temp & 0x00030000U) << 14) | + ((temp & 0x00000FFFU) << 4) | ((temp & 0x0000F000U) >> 12); + } - /* Rearrange the bytes and write them to the output buffer */ - output[0] = (uint8_t)s0; - output[1] = (uint8_t)s1; - output[2] = (uint8_t)s2; - output[3] = (uint8_t)s3; - output[4] = (uint8_t)(s0 >> 8); - output[5] = (uint8_t)(s1 >> 8); - output[6] = (uint8_t)(s2 >> 8); - output[7] = (uint8_t)(s3 >> 8); - output[8] = (uint8_t)(s0 >> 16); - output[9] = (uint8_t)(s1 >> 16); - output[10] = (uint8_t)(s2 >> 16); - output[11] = (uint8_t)(s3 >> 16); - output[12] = (uint8_t)(s0 >> 24); - output[13] = (uint8_t)(s1 >> 24); - output[14] = (uint8_t)(s2 >> 24); - output[15] = (uint8_t)(s3 >> 24); + /* Pack the state into the ciphertext buffer in big endian */ + be_store_word32(output, s0); + be_store_word32(output + 4, s1); + be_store_word32(output + 8, s2); + be_store_word32(output + 12, s3); } -/** - * \brief Converts the GIFT-128 word-based representation into nibble-based. - * - * \param output Output buffer to write the nibble-based version to. - * \param input Input buffer to read the word-based version from. - */ -static void gift128n_to_nibbles - (unsigned char *output, const unsigned char *input) +void gift128b_encrypt_preloaded + (const gift128b_key_schedule_t *ks, uint32_t output[4], + const uint32_t input[4]) { uint32_t s0, s1, s2, s3; + uint32_t w0, w1, w2, w3; + uint32_t temp; + uint8_t round; - /* Load the input bytes and rearrange them so that s0 contains the - * most significant nibbles and s3 contains the least significant */ - s0 = (((uint32_t)(input[12])) << 24) | - (((uint32_t)(input[8])) << 16) | - (((uint32_t)(input[4])) << 8) | - ((uint32_t)(input[0])); - s1 = (((uint32_t)(input[13])) << 24) | - (((uint32_t)(input[9])) << 16) | - (((uint32_t)(input[5])) << 8) | - ((uint32_t)(input[1])); - s2 = (((uint32_t)(input[14])) << 24) | - (((uint32_t)(input[10])) << 16) | - (((uint32_t)(input[6])) << 8) | - ((uint32_t)(input[2])); - s3 = (((uint32_t)(input[15])) << 24) | - (((uint32_t)(input[11])) << 16) | - (((uint32_t)(input[7])) << 8) | - ((uint32_t)(input[3])); + /* Copy the plaintext into the state buffer */ + s0 = input[0]; + s1 = input[1]; + s2 = input[2]; + s3 = input[3]; - /* Apply the inverse of PERM_WORDS() from the function above */ - #define INV_PERM_WORDS(_x) \ - do { \ - uint32_t x = (_x); \ - bit_permute_step(x, 0x00aa00aa, 7); \ - bit_permute_step(x, 0x0000cccc, 14); \ - bit_permute_step(x, 0x00f000f0, 4); \ - bit_permute_step(x, 0x0000ff00, 8); \ - (_x) = x; \ - } while (0) - INV_PERM_WORDS(s0); - INV_PERM_WORDS(s1); - INV_PERM_WORDS(s2); - INV_PERM_WORDS(s3); + /* The key schedule is initialized with the key itself */ + w0 = ks->k[3]; + w1 = ks->k[1]; + w2 = ks->k[2]; + w3 = ks->k[0]; + + /* Perform all 40 rounds */ + for (round = 0; round < 40; ++round) { + /* SubCells - apply the S-box */ + s1 ^= s0 & s2; + s0 ^= s1 & s3; + s2 ^= s0 | s1; + s3 ^= s2; + s1 ^= s3; + s3 ^= 0xFFFFFFFFU; + s2 ^= s0 & s1; + temp = s0; + s0 = s3; + s3 = temp; + + /* PermBits - apply the 128-bit permutation */ + PERM0(s0); + PERM1(s1); + PERM2(s2); + PERM3(s3); + + /* AddRoundKey - XOR in the key schedule and the round constant */ + s2 ^= w1; + s1 ^= w3; + s3 ^= 0x80000000U ^ GIFT128_RC[round]; + + /* Rotate the key schedule */ + temp = w3; + w3 = w2; + w2 = w1; + w1 = w0; + w0 = ((temp & 0xFFFC0000U) >> 2) | ((temp & 0x00030000U) << 14) | + ((temp & 0x00000FFFU) << 4) | ((temp & 0x0000F000U) >> 12); + } - /* Store the result into the output buffer as 32-bit words */ - le_store_word32(output + 12, s0); - le_store_word32(output + 8, s1); - le_store_word32(output + 4, s2); - le_store_word32(output, s3); + /* Pack the state into the ciphertext buffer */ + output[0] = s0; + output[1] = s1; + output[2] = s2; + output[3] = s3; } -void gift128n_encrypt +void gift128t_encrypt (const gift128n_key_schedule_t *ks, unsigned char *output, - const unsigned char *input) + const unsigned char *input, uint32_t tweak) { + uint32_t s0, s1, s2, s3; + uint32_t w0, w1, w2, w3; + uint32_t temp; + uint8_t round; + + /* Copy the plaintext into the state buffer and convert from nibbles */ gift128n_to_words(output, input); - gift128b_encrypt(ks, output, output); + s0 = be_load_word32(output); + s1 = be_load_word32(output + 4); + s2 = be_load_word32(output + 8); + s3 = be_load_word32(output + 12); + + /* The key schedule is initialized with the key itself */ + w0 = ks->k[3]; + w1 = ks->k[1]; + w2 = ks->k[2]; + w3 = ks->k[0]; + + /* Perform all 40 rounds */ + for (round = 0; round < 40; ++round) { + /* SubCells - apply the S-box */ + s1 ^= s0 & s2; + s0 ^= s1 & s3; + s2 ^= s0 | s1; + s3 ^= s2; + s1 ^= s3; + s3 ^= 0xFFFFFFFFU; + s2 ^= s0 & s1; + temp = s0; + s0 = s3; + s3 = temp; + + /* PermBits - apply the 128-bit permutation */ + PERM0(s0); + PERM1(s1); + PERM2(s2); + PERM3(s3); + + /* AddRoundKey - XOR in the key schedule and the round constant */ + s2 ^= w1; + s1 ^= w3; + s3 ^= 0x80000000U ^ GIFT128_RC[round]; + + /* AddTweak - XOR in the tweak every 5 rounds except the last */ + if (((round + 1) % 5) == 0 && round < 39) + s0 ^= tweak; + + /* Rotate the key schedule */ + temp = w3; + w3 = w2; + w2 = w1; + w1 = w0; + w0 = ((temp & 0xFFFC0000U) >> 2) | ((temp & 0x00030000U) << 14) | + ((temp & 0x00000FFFU) << 4) | ((temp & 0x0000F000U) >> 12); + } + + /* Pack the state into the ciphertext buffer in nibble form */ + be_store_word32(output, s0); + be_store_word32(output + 4, s1); + be_store_word32(output + 8, s2); + be_store_word32(output + 12, s3); gift128n_to_nibbles(output, output); } -void gift128n_decrypt - (const gift128n_key_schedule_t *ks, unsigned char *output, +#endif /* GIFT128_VARIANT_TINY */ + +#if GIFT128_VARIANT == GIFT128_VARIANT_FULL + +void gift128b_decrypt + (const gift128b_key_schedule_t *ks, unsigned char *output, const unsigned char *input) { - gift128n_to_words(output, input); - gift128b_decrypt(ks, output, output); - gift128n_to_nibbles(output, output); -} + uint32_t s0, s1, s2, s3; -/* 4-bit tweak values expanded to 32-bit */ -static uint32_t const GIFT128_tweaks[16] = { - 0x00000000, 0xe1e1e1e1, 0xd2d2d2d2, 0x33333333, - 0xb4b4b4b4, 0x55555555, 0x66666666, 0x87878787, - 0x78787878, 0x99999999, 0xaaaaaaaa, 0x4b4b4b4b, - 0xcccccccc, 0x2d2d2d2d, 0x1e1e1e1e, 0xffffffff -}; + /* Copy the plaintext into the state buffer and convert from big endian */ + s0 = be_load_word32(input); + s1 = be_load_word32(input + 4); + s2 = be_load_word32(input + 8); + s3 = be_load_word32(input + 12); -void gift128t_encrypt + /* Perform all 40 rounds five at a time using the fixsliced method */ + gift128b_decrypt_5_rounds(ks->k + 70, GIFT128_RC_fixsliced + 35); + gift128b_decrypt_5_rounds(ks->k + 60, GIFT128_RC_fixsliced + 30); + gift128b_decrypt_5_rounds(ks->k + 50, GIFT128_RC_fixsliced + 25); + gift128b_decrypt_5_rounds(ks->k + 40, GIFT128_RC_fixsliced + 20); + gift128b_decrypt_5_rounds(ks->k + 30, GIFT128_RC_fixsliced + 15); + gift128b_decrypt_5_rounds(ks->k + 20, GIFT128_RC_fixsliced + 10); + gift128b_decrypt_5_rounds(ks->k + 10, GIFT128_RC_fixsliced + 5); + gift128b_decrypt_5_rounds(ks->k, GIFT128_RC_fixsliced); + + /* Pack the state into the ciphertext buffer in big endian */ + be_store_word32(output, s0); + be_store_word32(output + 4, s1); + be_store_word32(output + 8, s2); + be_store_word32(output + 12, s3); +} + +void gift128t_decrypt (const gift128n_key_schedule_t *ks, unsigned char *output, - const unsigned char *input, unsigned char tweak) + const unsigned char *input, uint32_t tweak) { - uint32_t s0, s1, s2, s3, tword; + uint32_t s0, s1, s2, s3; - /* Copy the plaintext into the state buffer and convert from nibbles */ + /* Copy the ciphertext into the state buffer and convert from nibbles */ gift128n_to_words(output, input); s0 = be_load_word32(output); s1 = be_load_word32(output + 4); @@ -782,25 +1254,24 @@ void gift128t_encrypt s3 = be_load_word32(output + 12); /* Perform all 40 rounds five at a time using the fixsliced method. - * Every 5 rounds except the last we add the tweak value to the state */ - tword = GIFT128_tweaks[tweak]; - gift128b_encrypt_5_rounds(ks->k, GIFT128_RC); - s0 ^= tword; - gift128b_encrypt_5_rounds(ks->k + 10, GIFT128_RC + 5); - s0 ^= tword; - gift128b_encrypt_5_rounds(ks->k + 20, GIFT128_RC + 10); - s0 ^= tword; - gift128b_encrypt_5_rounds(ks->k + 30, GIFT128_RC + 15); - s0 ^= tword; - gift128b_encrypt_5_rounds(ks->k + 40, GIFT128_RC + 20); - s0 ^= tword; - gift128b_encrypt_5_rounds(ks->k + 50, GIFT128_RC + 25); - s0 ^= tword; - gift128b_encrypt_5_rounds(ks->k + 60, GIFT128_RC + 30); - s0 ^= tword; - gift128b_encrypt_5_rounds(ks->k + 70, GIFT128_RC + 35); + * Every 5 rounds except the first we add the tweak value to the state */ + gift128b_decrypt_5_rounds(ks->k + 70, GIFT128_RC_fixsliced + 35); + s0 ^= tweak; + gift128b_decrypt_5_rounds(ks->k + 60, GIFT128_RC_fixsliced + 30); + s0 ^= tweak; + gift128b_decrypt_5_rounds(ks->k + 50, GIFT128_RC_fixsliced + 25); + s0 ^= tweak; + gift128b_decrypt_5_rounds(ks->k + 40, GIFT128_RC_fixsliced + 20); + s0 ^= tweak; + gift128b_decrypt_5_rounds(ks->k + 30, GIFT128_RC_fixsliced + 15); + s0 ^= tweak; + gift128b_decrypt_5_rounds(ks->k + 20, GIFT128_RC_fixsliced + 10); + s0 ^= tweak; + gift128b_decrypt_5_rounds(ks->k + 10, GIFT128_RC_fixsliced + 5); + s0 ^= tweak; + gift128b_decrypt_5_rounds(ks->k, GIFT128_RC_fixsliced); - /* Pack the state into the ciphertext buffer in nibble form */ + /* Pack the state into the plaintext buffer in nibble form */ be_store_word32(output, s0); be_store_word32(output + 4, s1); be_store_word32(output + 8, s2); @@ -808,37 +1279,211 @@ void gift128t_encrypt gift128n_to_nibbles(output, output); } +#else /* GIFT128_VARIANT_SMALL || GIFT128_VARIANT_TINY */ + +/* The small variant uses fixslicing for encryption, but we need to change + * to bitslicing for decryption because of the difficulty of fast-forwarding + * the fixsliced key schedule to the end. So the tiny variant is used for + * decryption when the small variant is selected. Since the NIST AEAD modes + * for GIFT-128 only use the block encrypt operation, the inefficiencies + * in decryption don't matter all that much */ + +/** + * \def gift128b_load_and_forward_schedule() + * \brief Generate the decryption key at the end of the last round. + * + * To do that, we run the block operation forward to determine the + * final state of the key schedule after the last round: + * + * w0 = ks->k[0]; + * w1 = ks->k[1]; + * w2 = ks->k[2]; + * w3 = ks->k[3]; + * for (round = 0; round < 40; ++round) { + * temp = w3; + * w3 = w2; + * w2 = w1; + * w1 = w0; + * w0 = ((temp & 0xFFFC0000U) >> 2) | ((temp & 0x00030000U) << 14) | + * ((temp & 0x00000FFFU) << 4) | ((temp & 0x0000F000U) >> 12); + * } + * + * We can short-cut all of the above by noticing that we don't need + * to do the word rotations. Every 4 rounds, the rotation alignment + * returns to the original position and each word has been rotated + * by applying the "2 right and 4 left" bit-rotation step to it. + * We then repeat that 10 times for the full 40 rounds. The overall + * effect is to apply a "20 right and 40 left" bit-rotation to every + * word in the key schedule. That is equivalent to "4 right and 8 left" + * on the 16-bit sub-words. + */ +#if GIFT128_VARIANT != GIFT128_VARIANT_SMALL +#define gift128b_load_and_forward_schedule() \ + do { \ + w0 = ks->k[3]; \ + w1 = ks->k[1]; \ + w2 = ks->k[2]; \ + w3 = ks->k[0]; \ + w0 = ((w0 & 0xFFF00000U) >> 4) | ((w0 & 0x000F0000U) << 12) | \ + ((w0 & 0x000000FFU) << 8) | ((w0 & 0x0000FF00U) >> 8); \ + w1 = ((w1 & 0xFFF00000U) >> 4) | ((w1 & 0x000F0000U) << 12) | \ + ((w1 & 0x000000FFU) << 8) | ((w1 & 0x0000FF00U) >> 8); \ + w2 = ((w2 & 0xFFF00000U) >> 4) | ((w2 & 0x000F0000U) << 12) | \ + ((w2 & 0x000000FFU) << 8) | ((w2 & 0x0000FF00U) >> 8); \ + w3 = ((w3 & 0xFFF00000U) >> 4) | ((w3 & 0x000F0000U) << 12) | \ + ((w3 & 0x000000FFU) << 8) | ((w3 & 0x0000FF00U) >> 8); \ + } while (0) +#else +/* The small variant needs to also undo some of the rotations that were + * done to generate the fixsliced version of the key schedule */ +#define gift128b_load_and_forward_schedule() \ + do { \ + w0 = ks->k[3]; \ + w1 = ks->k[1]; \ + w2 = ks->k[2]; \ + w3 = ks->k[0]; \ + gift128b_swap_move(w3, w3, 0x000000FFU, 24); \ + gift128b_swap_move(w3, w3, 0x00003333U, 18); \ + gift128b_swap_move(w3, w3, 0x000F000FU, 12); \ + gift128b_swap_move(w3, w3, 0x00550055U, 9); \ + gift128b_swap_move(w1, w1, 0x000000FFU, 24); \ + gift128b_swap_move(w1, w1, 0x00003333U, 18); \ + gift128b_swap_move(w1, w1, 0x000F000FU, 12); \ + gift128b_swap_move(w1, w1, 0x00550055U, 9); \ + gift128b_swap_move(w2, w2, 0x000000FFU, 24); \ + gift128b_swap_move(w2, w2, 0x000F000FU, 12); \ + gift128b_swap_move(w2, w2, 0x03030303U, 6); \ + gift128b_swap_move(w2, w2, 0x11111111U, 3); \ + gift128b_swap_move(w0, w0, 0x000000FFU, 24); \ + gift128b_swap_move(w0, w0, 0x000F000FU, 12); \ + gift128b_swap_move(w0, w0, 0x03030303U, 6); \ + gift128b_swap_move(w0, w0, 0x11111111U, 3); \ + w0 = ((w0 & 0xFFF00000U) >> 4) | ((w0 & 0x000F0000U) << 12) | \ + ((w0 & 0x000000FFU) << 8) | ((w0 & 0x0000FF00U) >> 8); \ + w1 = ((w1 & 0xFFF00000U) >> 4) | ((w1 & 0x000F0000U) << 12) | \ + ((w1 & 0x000000FFU) << 8) | ((w1 & 0x0000FF00U) >> 8); \ + w2 = ((w2 & 0xFFF00000U) >> 4) | ((w2 & 0x000F0000U) << 12) | \ + ((w2 & 0x000000FFU) << 8) | ((w2 & 0x0000FF00U) >> 8); \ + w3 = ((w3 & 0xFFF00000U) >> 4) | ((w3 & 0x000F0000U) << 12) | \ + ((w3 & 0x000000FFU) << 8) | ((w3 & 0x0000FF00U) >> 8); \ + } while (0) +#endif + +void gift128b_decrypt + (const gift128b_key_schedule_t *ks, unsigned char *output, + const unsigned char *input) +{ + uint32_t s0, s1, s2, s3; + uint32_t w0, w1, w2, w3; + uint32_t temp; + uint8_t round; + + /* Copy the ciphertext into the state buffer and convert from big endian */ + s0 = be_load_word32(input); + s1 = be_load_word32(input + 4); + s2 = be_load_word32(input + 8); + s3 = be_load_word32(input + 12); + + /* Generate the decryption key at the end of the last round */ + gift128b_load_and_forward_schedule(); + + /* Perform all 40 rounds */ + for (round = 40; round > 0; --round) { + /* Rotate the key schedule backwards */ + temp = w0; + w0 = w1; + w1 = w2; + w2 = w3; + w3 = ((temp & 0x3FFF0000U) << 2) | ((temp & 0xC0000000U) >> 14) | + ((temp & 0x0000FFF0U) >> 4) | ((temp & 0x0000000FU) << 12); + + /* AddRoundKey - XOR in the key schedule and the round constant */ + s2 ^= w1; + s1 ^= w3; + s3 ^= 0x80000000U ^ GIFT128_RC[round - 1]; + + /* InvPermBits - apply the inverse of the 128-bit permutation */ + INV_PERM0(s0); + INV_PERM1(s1); + INV_PERM2(s2); + INV_PERM3(s3); + + /* InvSubCells - apply the inverse of the S-box */ + temp = s0; + s0 = s3; + s3 = temp; + s2 ^= s0 & s1; + s3 ^= 0xFFFFFFFFU; + s1 ^= s3; + s3 ^= s2; + s2 ^= s0 | s1; + s0 ^= s1 & s3; + s1 ^= s0 & s2; + } + + /* Pack the state into the plaintext buffer in big endian */ + be_store_word32(output, s0); + be_store_word32(output + 4, s1); + be_store_word32(output + 8, s2); + be_store_word32(output + 12, s3); +} + void gift128t_decrypt (const gift128n_key_schedule_t *ks, unsigned char *output, - const unsigned char *input, unsigned char tweak) + const unsigned char *input, uint32_t tweak) { - uint32_t s0, s1, s2, s3, tword; + uint32_t s0, s1, s2, s3; + uint32_t w0, w1, w2, w3; + uint32_t temp; + uint8_t round; - /* Copy the ciphertext into the state buffer and convert from nibbles */ + /* Copy the plaintext into the state buffer and convert from nibbles */ gift128n_to_words(output, input); s0 = be_load_word32(output); s1 = be_load_word32(output + 4); s2 = be_load_word32(output + 8); s3 = be_load_word32(output + 12); - /* Perform all 40 rounds five at a time using the fixsliced method. - * Every 5 rounds except the first we add the tweak value to the state */ - tword = GIFT128_tweaks[tweak]; - gift128b_decrypt_5_rounds(ks->k + 70, GIFT128_RC + 35); - s0 ^= tword; - gift128b_decrypt_5_rounds(ks->k + 60, GIFT128_RC + 30); - s0 ^= tword; - gift128b_decrypt_5_rounds(ks->k + 50, GIFT128_RC + 25); - s0 ^= tword; - gift128b_decrypt_5_rounds(ks->k + 40, GIFT128_RC + 20); - s0 ^= tword; - gift128b_decrypt_5_rounds(ks->k + 30, GIFT128_RC + 15); - s0 ^= tword; - gift128b_decrypt_5_rounds(ks->k + 20, GIFT128_RC + 10); - s0 ^= tword; - gift128b_decrypt_5_rounds(ks->k + 10, GIFT128_RC + 5); - s0 ^= tword; - gift128b_decrypt_5_rounds(ks->k, GIFT128_RC); + /* Generate the decryption key at the end of the last round */ + gift128b_load_and_forward_schedule(); + + /* Perform all 40 rounds */ + for (round = 40; round > 0; --round) { + /* Rotate the key schedule backwards */ + temp = w0; + w0 = w1; + w1 = w2; + w2 = w3; + w3 = ((temp & 0x3FFF0000U) << 2) | ((temp & 0xC0000000U) >> 14) | + ((temp & 0x0000FFF0U) >> 4) | ((temp & 0x0000000FU) << 12); + + /* AddTweak - XOR in the tweak every 5 rounds except the last */ + if ((round % 5) == 0 && round < 40) + s0 ^= tweak; + + /* AddRoundKey - XOR in the key schedule and the round constant */ + s2 ^= w1; + s1 ^= w3; + s3 ^= 0x80000000U ^ GIFT128_RC[round - 1]; + + /* InvPermBits - apply the inverse of the 128-bit permutation */ + INV_PERM0(s0); + INV_PERM1(s1); + INV_PERM2(s2); + INV_PERM3(s3); + + /* InvSubCells - apply the inverse of the S-box */ + temp = s0; + s0 = s3; + s3 = temp; + s2 ^= s0 & s1; + s3 ^= 0xFFFFFFFFU; + s1 ^= s3; + s3 ^= s2; + s2 ^= s0 | s1; + s0 ^= s1 & s3; + s1 ^= s0 & s2; + } /* Pack the state into the plaintext buffer in nibble form */ be_store_word32(output, s0); @@ -847,3 +1492,7 @@ void gift128t_decrypt be_store_word32(output + 12, s3); gift128n_to_nibbles(output, output); } + +#endif /* GIFT128_VARIANT_SMALL || GIFT128_VARIANT_TINY */ + +#endif /* !GIFT128_VARIANT_ASM */ diff --git a/hyena/Implementations/crypto_aead/hyenav1/rhys/internal-gift128.h b/hyena/Implementations/crypto_aead/hyenav1/rhys/internal-gift128.h index 1ac40e5..f57d143 100644 --- a/hyena/Implementations/crypto_aead/hyenav1/rhys/internal-gift128.h +++ b/hyena/Implementations/crypto_aead/hyenav1/rhys/internal-gift128.h @@ -47,11 +47,13 @@ * in any of the NIST submissions so we don't bother with it in this library. * * References: https://eprint.iacr.org/2017/622.pdf, + * https://eprint.iacr.org/2020/412.pdf, * https://giftcipher.github.io/gift/ */ #include #include +#include "internal-gift128-config.h" #ifdef __cplusplus extern "C" { @@ -63,16 +65,23 @@ extern "C" { #define GIFT128_BLOCK_SIZE 16 /** - * \brief Number of round keys for the fixsliced representation of GIFT-128. + * \var GIFT128_ROUND_KEYS + * \brief Number of round keys for the GIFT-128 key schedule. */ +#if GIFT128_VARIANT == GIFT128_VARIANT_TINY +#define GIFT128_ROUND_KEYS 4 +#elif GIFT128_VARIANT == GIFT128_VARIANT_SMALL +#define GIFT128_ROUND_KEYS 20 +#else #define GIFT128_ROUND_KEYS 80 +#endif /** * \brief Structure of the key schedule for GIFT-128 (bit-sliced). */ typedef struct { - /** Pre-computed round keys in the fixsliced form */ + /** Pre-computed round keys for bit-sliced GIFT-128 */ uint32_t k[GIFT128_ROUND_KEYS]; } gift128b_key_schedule_t; @@ -81,14 +90,9 @@ typedef struct * \brief Initializes the key schedule for GIFT-128 (bit-sliced). * * \param ks Points to the key schedule to initialize. - * \param key Points to the key data. - * \param key_len Length of the key data, which must be 16. - * - * \return Non-zero on success or zero if there is something wrong - * with the parameters. + * \param key Points to the 16 bytes of the key data. */ -int gift128b_init - (gift128b_key_schedule_t *ks, const unsigned char *key, size_t key_len); +void gift128b_init(gift128b_key_schedule_t *ks, const unsigned char *key); /** * \brief Encrypts a 128-bit block with GIFT-128 (bit-sliced). @@ -145,14 +149,9 @@ typedef gift128b_key_schedule_t gift128n_key_schedule_t; * \brief Initializes the key schedule for GIFT-128 (nibble-based). * * \param ks Points to the key schedule to initialize. - * \param key Points to the key data. - * \param key_len Length of the key data, which must be 16. - * - * \return Non-zero on success or zero if there is something wrong - * with the parameters. + * \param key Points to the 16 bytes of the key data. */ -int gift128n_init - (gift128n_key_schedule_t *ks, const unsigned char *key, size_t key_len); +void gift128n_init(gift128n_key_schedule_t *ks, const unsigned char *key); /** * \brief Encrypts a 128-bit block with GIFT-128 (nibble-based). @@ -182,13 +181,31 @@ void gift128n_decrypt (const gift128n_key_schedule_t *ks, unsigned char *output, const unsigned char *input); +/* 4-bit tweak values expanded to 32-bit for TweGIFT-128 */ +#define GIFT128T_TWEAK_0 0x00000000 /**< TweGIFT-128 tweak value 0 */ +#define GIFT128T_TWEAK_1 0xe1e1e1e1 /**< TweGIFT-128 tweak value 1 */ +#define GIFT128T_TWEAK_2 0xd2d2d2d2 /**< TweGIFT-128 tweak value 2 */ +#define GIFT128T_TWEAK_3 0x33333333 /**< TweGIFT-128 tweak value 3 */ +#define GIFT128T_TWEAK_4 0xb4b4b4b4 /**< TweGIFT-128 tweak value 4 */ +#define GIFT128T_TWEAK_5 0x55555555 /**< TweGIFT-128 tweak value 5 */ +#define GIFT128T_TWEAK_6 0x66666666 /**< TweGIFT-128 tweak value 6 */ +#define GIFT128T_TWEAK_7 0x87878787 /**< TweGIFT-128 tweak value 7 */ +#define GIFT128T_TWEAK_8 0x78787878 /**< TweGIFT-128 tweak value 8 */ +#define GIFT128T_TWEAK_9 0x99999999 /**< TweGIFT-128 tweak value 9 */ +#define GIFT128T_TWEAK_10 0xaaaaaaaa /**< TweGIFT-128 tweak value 10 */ +#define GIFT128T_TWEAK_11 0x4b4b4b4b /**< TweGIFT-128 tweak value 11 */ +#define GIFT128T_TWEAK_12 0xcccccccc /**< TweGIFT-128 tweak value 12 */ +#define GIFT128T_TWEAK_13 0x2d2d2d2d /**< TweGIFT-128 tweak value 13 */ +#define GIFT128T_TWEAK_14 0x1e1e1e1e /**< TweGIFT-128 tweak value 14 */ +#define GIFT128T_TWEAK_15 0xffffffff /**< TweGIFT-128 tweak value 15 */ + /** * \brief Encrypts a 128-bit block with TweGIFT-128 (tweakable variant). * * \param ks Points to the GIFT-128 key schedule. * \param output Output buffer which must be at least 16 bytes in length. * \param input Input buffer which must be at least 16 bytes in length. - * \param tweak 4-bit tweak value. + * \param tweak 4-bit tweak value expanded to 32-bit. * * The \a input and \a output buffers can be the same buffer for * in-place encryption. @@ -200,7 +217,7 @@ void gift128n_decrypt */ void gift128t_encrypt (const gift128n_key_schedule_t *ks, unsigned char *output, - const unsigned char *input, unsigned char tweak); + const unsigned char *input, uint32_t tweak); /** * \brief Decrypts a 128-bit block with TweGIFT-128 (tweakable variant). @@ -208,7 +225,7 @@ void gift128t_encrypt * \param ks Points to the GIFT-128 key schedule. * \param output Output buffer which must be at least 16 bytes in length. * \param input Input buffer which must be at least 16 bytes in length. - * \param tweak 4-bit tweak value. + * \param tweak 4-bit tweak value expanded to 32-bit. * * The \a input and \a output buffers can be the same buffer for * in-place encryption. @@ -220,7 +237,7 @@ void gift128t_encrypt */ void gift128t_decrypt (const gift128n_key_schedule_t *ks, unsigned char *output, - const unsigned char *input, unsigned char tweak); + const unsigned char *input, uint32_t tweak); #ifdef __cplusplus } diff --git a/hyena/Implementations/crypto_aead/hyenav1/rhys/internal-util.h b/hyena/Implementations/crypto_aead/hyenav1/rhys/internal-util.h index e79158c..e30166d 100644 --- a/hyena/Implementations/crypto_aead/hyenav1/rhys/internal-util.h +++ b/hyena/Implementations/crypto_aead/hyenav1/rhys/internal-util.h @@ -238,6 +238,17 @@ } \ } while (0) +/* Rotation functions need to be optimised for best performance on AVR. + * The most efficient rotations are where the number of bits is 1 or a + * multiple of 8, so we compose the efficient rotations to produce all + * other rotation counts of interest. */ + +#if defined(__AVR__) +#define LW_CRYPTO_ROTATE32_COMPOSED 1 +#else +#define LW_CRYPTO_ROTATE32_COMPOSED 0 +#endif + /* Rotation macros for 32-bit arguments */ /* Generic left rotate */ @@ -254,6 +265,8 @@ (_temp >> (bits)) | (_temp << (32 - (bits))); \ })) +#if !LW_CRYPTO_ROTATE32_COMPOSED + /* Left rotate by a specific number of bits. These macros may be replaced * with more efficient ones on platforms that lack a barrel shifter */ #define leftRotate1(a) (leftRotate((a), 1)) @@ -322,6 +335,138 @@ #define rightRotate30(a) (rightRotate((a), 30)) #define rightRotate31(a) (rightRotate((a), 31)) +#else /* LW_CRYPTO_ROTATE32_COMPOSED */ + +/* Composed rotation macros where 1 and 8 are fast, but others are slow */ + +/* Left rotate by 1 */ +#define leftRotate1(a) (leftRotate((a), 1)) + +/* Left rotate by 2 */ +#define leftRotate2(a) (leftRotate(leftRotate((a), 1), 1)) + +/* Left rotate by 3 */ +#define leftRotate3(a) (leftRotate(leftRotate(leftRotate((a), 1), 1), 1)) + +/* Left rotate by 4 */ +#define leftRotate4(a) (leftRotate(leftRotate(leftRotate(leftRotate((a), 1), 1), 1), 1)) + +/* Left rotate by 5: Rotate left by 8, then right by 3 */ +#define leftRotate5(a) (rightRotate(rightRotate(rightRotate(leftRotate((a), 8), 1), 1), 1)) + +/* Left rotate by 6: Rotate left by 8, then right by 2 */ +#define leftRotate6(a) (rightRotate(rightRotate(leftRotate((a), 8), 1), 1)) + +/* Left rotate by 7: Rotate left by 8, then right by 1 */ +#define leftRotate7(a) (rightRotate(leftRotate((a), 8), 1)) + +/* Left rotate by 8 */ +#define leftRotate8(a) (leftRotate((a), 8)) + +/* Left rotate by 9: Rotate left by 8, then left by 1 */ +#define leftRotate9(a) (leftRotate(leftRotate((a), 8), 1)) + +/* Left rotate by 10: Rotate left by 8, then left by 2 */ +#define leftRotate10(a) (leftRotate(leftRotate(leftRotate((a), 8), 1), 1)) + +/* Left rotate by 11: Rotate left by 8, then left by 3 */ +#define leftRotate11(a) (leftRotate(leftRotate(leftRotate(leftRotate((a), 8), 1), 1), 1)) + +/* Left rotate by 12: Rotate left by 16, then right by 4 */ +#define leftRotate12(a) (rightRotate(rightRotate(rightRotate(rightRotate(leftRotate((a), 16), 1), 1), 1), 1)) + +/* Left rotate by 13: Rotate left by 16, then right by 3 */ +#define leftRotate13(a) (rightRotate(rightRotate(rightRotate(leftRotate((a), 16), 1), 1), 1)) + +/* Left rotate by 14: Rotate left by 16, then right by 2 */ +#define leftRotate14(a) (rightRotate(rightRotate(leftRotate((a), 16), 1), 1)) + +/* Left rotate by 15: Rotate left by 16, then right by 1 */ +#define leftRotate15(a) (rightRotate(leftRotate((a), 16), 1)) + +/* Left rotate by 16 */ +#define leftRotate16(a) (leftRotate((a), 16)) + +/* Left rotate by 17: Rotate left by 16, then left by 1 */ +#define leftRotate17(a) (leftRotate(leftRotate((a), 16), 1)) + +/* Left rotate by 18: Rotate left by 16, then left by 2 */ +#define leftRotate18(a) (leftRotate(leftRotate(leftRotate((a), 16), 1), 1)) + +/* Left rotate by 19: Rotate left by 16, then left by 3 */ +#define leftRotate19(a) (leftRotate(leftRotate(leftRotate(leftRotate((a), 16), 1), 1), 1)) + +/* Left rotate by 20: Rotate left by 16, then left by 4 */ +#define leftRotate20(a) (leftRotate(leftRotate(leftRotate(leftRotate(leftRotate((a), 16), 1), 1), 1), 1)) + +/* Left rotate by 21: Rotate left by 24, then right by 3 */ +#define leftRotate21(a) (rightRotate(rightRotate(rightRotate(leftRotate((a), 24), 1), 1), 1)) + +/* Left rotate by 22: Rotate left by 24, then right by 2 */ +#define leftRotate22(a) (rightRotate(rightRotate(leftRotate((a), 24), 1), 1)) + +/* Left rotate by 23: Rotate left by 24, then right by 1 */ +#define leftRotate23(a) (rightRotate(leftRotate((a), 24), 1)) + +/* Left rotate by 24 */ +#define leftRotate24(a) (leftRotate((a), 24)) + +/* Left rotate by 25: Rotate left by 24, then left by 1 */ +#define leftRotate25(a) (leftRotate(leftRotate((a), 24), 1)) + +/* Left rotate by 26: Rotate left by 24, then left by 2 */ +#define leftRotate26(a) (leftRotate(leftRotate(leftRotate((a), 24), 1), 1)) + +/* Left rotate by 27: Rotate left by 24, then left by 3 */ +#define leftRotate27(a) (leftRotate(leftRotate(leftRotate(leftRotate((a), 24), 1), 1), 1)) + +/* Left rotate by 28: Rotate right by 4 */ +#define leftRotate28(a) (rightRotate(rightRotate(rightRotate(rightRotate((a), 1), 1), 1), 1)) + +/* Left rotate by 29: Rotate right by 3 */ +#define leftRotate29(a) (rightRotate(rightRotate(rightRotate((a), 1), 1), 1)) + +/* Left rotate by 30: Rotate right by 2 */ +#define leftRotate30(a) (rightRotate(rightRotate((a), 1), 1)) + +/* Left rotate by 31: Rotate right by 1 */ +#define leftRotate31(a) (rightRotate((a), 1)) + +/* Define the 32-bit right rotations in terms of left rotations */ +#define rightRotate1(a) (leftRotate31((a))) +#define rightRotate2(a) (leftRotate30((a))) +#define rightRotate3(a) (leftRotate29((a))) +#define rightRotate4(a) (leftRotate28((a))) +#define rightRotate5(a) (leftRotate27((a))) +#define rightRotate6(a) (leftRotate26((a))) +#define rightRotate7(a) (leftRotate25((a))) +#define rightRotate8(a) (leftRotate24((a))) +#define rightRotate9(a) (leftRotate23((a))) +#define rightRotate10(a) (leftRotate22((a))) +#define rightRotate11(a) (leftRotate21((a))) +#define rightRotate12(a) (leftRotate20((a))) +#define rightRotate13(a) (leftRotate19((a))) +#define rightRotate14(a) (leftRotate18((a))) +#define rightRotate15(a) (leftRotate17((a))) +#define rightRotate16(a) (leftRotate16((a))) +#define rightRotate17(a) (leftRotate15((a))) +#define rightRotate18(a) (leftRotate14((a))) +#define rightRotate19(a) (leftRotate13((a))) +#define rightRotate20(a) (leftRotate12((a))) +#define rightRotate21(a) (leftRotate11((a))) +#define rightRotate22(a) (leftRotate10((a))) +#define rightRotate23(a) (leftRotate9((a))) +#define rightRotate24(a) (leftRotate8((a))) +#define rightRotate25(a) (leftRotate7((a))) +#define rightRotate26(a) (leftRotate6((a))) +#define rightRotate27(a) (leftRotate5((a))) +#define rightRotate28(a) (leftRotate4((a))) +#define rightRotate29(a) (leftRotate3((a))) +#define rightRotate30(a) (leftRotate2((a))) +#define rightRotate31(a) (leftRotate1((a))) + +#endif /* LW_CRYPTO_ROTATE32_COMPOSED */ + /* Rotation macros for 64-bit arguments */ /* Generic left rotate */ diff --git a/hyena/Implementations/crypto_aead/hyenav2/rhys/encrypt.c b/hyena/Implementations/crypto_aead/hyenav2/rhys/encrypt.c index db50784..188335e 100644 --- a/hyena/Implementations/crypto_aead/hyenav2/rhys/encrypt.c +++ b/hyena/Implementations/crypto_aead/hyenav2/rhys/encrypt.c @@ -9,7 +9,7 @@ int crypto_aead_encrypt const unsigned char *npub, const unsigned char *k) { - return hyena_aead_encrypt + return hyena_v2_aead_encrypt (c, clen, m, mlen, ad, adlen, nsec, npub, k); } @@ -21,6 +21,6 @@ int crypto_aead_decrypt const unsigned char *npub, const unsigned char *k) { - return hyena_aead_decrypt + return hyena_v2_aead_decrypt (m, mlen, nsec, c, clen, ad, adlen, npub, k); } diff --git a/hyena/Implementations/crypto_aead/hyenav2/rhys/hyena.c b/hyena/Implementations/crypto_aead/hyenav2/rhys/hyena.c index db5ba2b..eaafb36 100644 --- a/hyena/Implementations/crypto_aead/hyenav2/rhys/hyena.c +++ b/hyena/Implementations/crypto_aead/hyenav2/rhys/hyena.c @@ -25,14 +25,24 @@ #include "internal-util.h" #include -aead_cipher_t const hyena_cipher = { - "HYENA", +aead_cipher_t const hyena_v1_cipher = { + "HYENA-v1", HYENA_KEY_SIZE, HYENA_NONCE_SIZE, HYENA_TAG_SIZE, AEAD_FLAG_LITTLE_ENDIAN, - hyena_aead_encrypt, - hyena_aead_decrypt + hyena_v1_aead_encrypt, + hyena_v1_aead_decrypt +}; + +aead_cipher_t const hyena_v2_cipher = { + "HYENA-v2", + HYENA_KEY_SIZE, + HYENA_NONCE_SIZE, + HYENA_TAG_SIZE, + AEAD_FLAG_LITTLE_ENDIAN, + hyena_v2_aead_encrypt, + hyena_v2_aead_decrypt }; /** @@ -69,7 +79,236 @@ static void hyena_triple_delta(unsigned char D[8]) } /** - * \brief Process the associated data for HYENA. + * \brief Process the associated data for HYENA-v1. + * + * \param ks Key schedule for the GIFT-128 cipher. + * \param Y Internal hash state of HYENA. + * \param D Internal hash state of HYENA. + * \param ad Points to the associated data. + * \param adlen Length of the associated data in bytes. + */ +static void hyena_v1_process_ad + (const gift128n_key_schedule_t *ks, unsigned char Y[16], + unsigned char D[8], const unsigned char *ad, + unsigned long long adlen) +{ + unsigned char feedback[16]; + hyena_double_delta(D); + while (adlen > 16) { + memcpy(feedback, ad, 16); + lw_xor_block(feedback + 8, Y + 8, 8); + lw_xor_block(feedback + 8, D, 8); + lw_xor_block(Y, feedback, 16); + gift128n_encrypt(ks, Y, Y); + hyena_double_delta(D); + ad += 16; + adlen -= 16; + } + if (adlen == 16) { + hyena_double_delta(D); + memcpy(feedback, ad, 16); + lw_xor_block(feedback + 8, Y + 8, 8); + lw_xor_block(feedback + 8, D, 8); + lw_xor_block(Y, feedback, 16); + } else { + unsigned temp = (unsigned)adlen; + hyena_double_delta(D); + hyena_double_delta(D); + memcpy(feedback, ad, temp); + feedback[temp] = 0x01; + memset(feedback + temp + 1, 0, 15 - temp); + if (temp > 8) + lw_xor_block(feedback + 8, Y + 8, temp - 8); + lw_xor_block(feedback + 8, D, 8); + lw_xor_block(Y, feedback, 16); + } +} + +int hyena_v1_aead_encrypt + (unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k) +{ + gift128n_key_schedule_t ks; + unsigned char Y[16]; + unsigned char D[8]; + unsigned char feedback[16]; + unsigned index; + (void)nsec; + + /* Set the length of the returned ciphertext */ + *clen = mlen + HYENA_TAG_SIZE; + + /* Set up the key schedule and use it to encrypt the nonce */ + gift128n_init(&ks, k); + Y[0] = 0; + if (adlen == 0) + Y[0] |= 0x01; + if (adlen == 0 && mlen == 0) + Y[0] |= 0x02; + Y[1] = 0; + Y[2] = 0; + Y[3] = 0; + memcpy(Y + 4, npub, HYENA_NONCE_SIZE); + gift128n_encrypt(&ks, Y, Y); + memcpy(D, Y + 8, 8); + + /* Process the associated data */ + hyena_v1_process_ad(&ks, Y, D, ad, adlen); + + /* Encrypt the plaintext to produce the ciphertext */ + if (mlen > 0) { + while (mlen > 16) { + gift128n_encrypt(&ks, Y, Y); + hyena_double_delta(D); + memcpy(feedback, m, 16); + lw_xor_block(feedback + 8, Y + 8, 8); + lw_xor_block(feedback + 8, D, 8); + lw_xor_block_2_src(c, m, Y, 16); + lw_xor_block(Y, feedback, 16); + c += 16; + m += 16; + mlen -= 16; + } + gift128n_encrypt(&ks, Y, Y); + if (mlen == 16) { + hyena_double_delta(D); + hyena_double_delta(D); + memcpy(feedback, m, 16); + lw_xor_block(feedback + 8, Y + 8, 8); + lw_xor_block(feedback + 8, D, 8); + lw_xor_block_2_src(c, m, Y, 16); + lw_xor_block(Y, feedback, 16); + c += 16; + } else { + unsigned temp = (unsigned)mlen; + hyena_double_delta(D); + hyena_double_delta(D); + hyena_double_delta(D); + memcpy(feedback, m, temp); + feedback[temp] = 0x01; + memset(feedback + temp + 1, 0, 15 - temp); + if (temp > 8) + lw_xor_block(feedback + 8, Y + 8, temp - 8); + lw_xor_block(feedback + 8, D, 8); + lw_xor_block_2_src(c, m, Y, temp); + lw_xor_block(Y, feedback, 16); + c += temp; + } + } + + /* Swap the two halves of Y and generate the authentication tag */ + for (index = 0; index < 8; ++index) { + unsigned char temp1 = Y[index]; + unsigned char temp2 = Y[index + 8]; + Y[index] = temp2; + Y[index + 8] = temp1; + } + gift128n_encrypt(&ks, c, Y); + return 0; +} + +int hyena_v1_aead_decrypt + (unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k) +{ + gift128n_key_schedule_t ks; + unsigned char Y[16]; + unsigned char D[8]; + unsigned char feedback[16]; + unsigned char *mtemp; + unsigned index; + (void)nsec; + + /* Validate the ciphertext length and set the return "mlen" value */ + if (clen < HYENA_TAG_SIZE) + return -1; + *mlen = clen - HYENA_TAG_SIZE; + + /* Set up the key schedule and use it to encrypt the nonce */ + gift128n_init(&ks, k); + Y[0] = 0; + if (adlen == 0) + Y[0] |= 0x01; + if (adlen == 0 && clen == HYENA_TAG_SIZE) + Y[0] |= 0x02; + Y[1] = 0; + Y[2] = 0; + Y[3] = 0; + memcpy(Y + 4, npub, HYENA_NONCE_SIZE); + gift128n_encrypt(&ks, Y, Y); + memcpy(D, Y + 8, 8); + + /* Process the associated data */ + hyena_v1_process_ad(&ks, Y, D, ad, adlen); + + /* Decrypt the ciphertext to produce the plaintext */ + clen -= HYENA_TAG_SIZE; + mtemp = m; + if (clen > 0) { + while (clen > 16) { + gift128n_encrypt(&ks, Y, Y); + hyena_double_delta(D); + memcpy(feedback + 8, c + 8, 8); + lw_xor_block_2_src(m, c, Y, 16); + memcpy(feedback, m, 8); + lw_xor_block(feedback + 8, D, 8); + lw_xor_block(Y, feedback, 16); + c += 16; + m += 16; + clen -= 16; + } + gift128n_encrypt(&ks, Y, Y); + if (clen == 16) { + hyena_double_delta(D); + hyena_double_delta(D); + memcpy(feedback + 8, c + 8, 8); + lw_xor_block_2_src(m, c, Y, 16); + memcpy(feedback, m, 8); + lw_xor_block(feedback + 8, D, 8); + lw_xor_block(Y, feedback, 16); + c += 16; + } else { + unsigned temp = (unsigned)clen; + hyena_double_delta(D); + hyena_double_delta(D); + hyena_double_delta(D); + if (temp > 8) { + memcpy(feedback + 8, c + 8, temp - 8); + lw_xor_block_2_src(m, c, Y, temp); + memcpy(feedback, m, 8); + } else { + lw_xor_block_2_src(m, c, Y, temp); + memcpy(feedback, m, temp); + } + feedback[temp] = 0x01; + memset(feedback + temp + 1, 0, 15 - temp); + lw_xor_block(feedback + 8, D, 8); + lw_xor_block(Y, feedback, 16); + c += temp; + } + } + + /* Swap the two halves of Y and check the authentication tag */ + for (index = 0; index < 8; ++index) { + unsigned char temp1 = Y[index]; + unsigned char temp2 = Y[index + 8]; + Y[index] = temp2; + Y[index + 8] = temp1; + } + gift128n_encrypt(&ks, Y, Y); + return aead_check_tag(mtemp, *mlen, Y, c, HYENA_TAG_SIZE); +} + +/** + * \brief Process the associated data for HYENA-v2. * * \param ks Key schedule for the GIFT-128 cipher. * \param Y Internal hash state of HYENA. @@ -77,7 +316,7 @@ static void hyena_triple_delta(unsigned char D[8]) * \param ad Points to the associated data. * \param adlen Length of the associated data in bytes. */ -static void hyena_process_ad +static void hyena_v2_process_ad (const gift128n_key_schedule_t *ks, unsigned char Y[16], unsigned char D[8], const unsigned char *ad, unsigned long long adlen) @@ -113,7 +352,7 @@ static void hyena_process_ad } } -int hyena_aead_encrypt +int hyena_v2_aead_encrypt (unsigned char *c, unsigned long long *clen, const unsigned char *m, unsigned long long mlen, const unsigned char *ad, unsigned long long adlen, @@ -146,7 +385,7 @@ int hyena_aead_encrypt memcpy(D, Y + 8, 8); /* Process the associated data */ - hyena_process_ad(&ks, Y, D, ad, adlen); + hyena_v2_process_ad(&ks, Y, D, ad, adlen); /* Encrypt the plaintext to produce the ciphertext */ if (mlen > 0) { @@ -198,7 +437,7 @@ int hyena_aead_encrypt return 0; } -int hyena_aead_decrypt +int hyena_v2_aead_decrypt (unsigned char *m, unsigned long long *mlen, unsigned char *nsec, const unsigned char *c, unsigned long long clen, @@ -234,7 +473,7 @@ int hyena_aead_decrypt memcpy(D, Y + 8, 8); /* Process the associated data */ - hyena_process_ad(&ks, Y, D, ad, adlen); + hyena_v2_process_ad(&ks, Y, D, ad, adlen); /* Decrypt the ciphertext to produce the plaintext */ clen -= HYENA_TAG_SIZE; diff --git a/hyena/Implementations/crypto_aead/hyenav2/rhys/hyena.h b/hyena/Implementations/crypto_aead/hyenav2/rhys/hyena.h index ee9bb9c..79585d5 100644 --- a/hyena/Implementations/crypto_aead/hyenav2/rhys/hyena.h +++ b/hyena/Implementations/crypto_aead/hyenav2/rhys/hyena.h @@ -33,6 +33,12 @@ * GIFT-128 block cipher. The algorithm has a 128-bit key, a 96-bit nonce, * and a 128-bit authentication tag. * + * This library implements both the v1 and v2 versions of HYENA from the + * authors. The v1 version was submitted to the second round of the + * NIST Lightweight Cryptography Competition but was later found to have a + * forgery attack. The authors fixed this with v2 but it was too late to + * submit the update for the second round. + * * References: https://www.isical.ac.in/~lightweight/hyena/ */ @@ -56,12 +62,76 @@ extern "C" { #define HYENA_NONCE_SIZE 12 /** - * \brief Meta-information block for the HYENA cipher. + * \brief Meta-information block for the HYENA-v1 cipher. + */ +extern aead_cipher_t const hyena_v1_cipher; + +/** + * \brief Meta-information block for the HYENA-v2 cipher. + */ +extern aead_cipher_t const hyena_v2_cipher; + +/** + * \brief Encrypts and authenticates a packet with HYENA-v1. + * + * \param c Buffer to receive the output. + * \param clen On exit, set to the length of the output which includes + * the ciphertext and the 16 byte authentication tag. + * \param m Buffer that contains the plaintext message to encrypt. + * \param mlen Length of the plaintext message in bytes. + * \param ad Buffer that contains associated data to authenticate + * along with the packet but which does not need to be encrypted. + * \param adlen Length of the associated data in bytes. + * \param nsec Secret nonce - not used by this algorithm. + * \param npub Points to the public nonce for the packet which must + * be 12 bytes in length. + * \param k Points to the 16 bytes of the key to use to encrypt the packet. + * + * \return 0 on success, or a negative value if there was an error in + * the parameters. + * + * \sa hyena_aead_decrypt() */ -extern aead_cipher_t const hyena_cipher; +int hyena_v1_aead_encrypt + (unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k); + +/** + * \brief Decrypts and authenticates a packet with HYENA-v1. + * + * \param m Buffer to receive the plaintext message on output. + * \param mlen Receives the length of the plaintext message on output. + * \param nsec Secret nonce - not used by this algorithm. + * \param c Buffer that contains the ciphertext and authentication + * tag to decrypt. + * \param clen Length of the input data in bytes, which includes the + * ciphertext and the 16 byte authentication tag. + * \param ad Buffer that contains associated data to authenticate + * along with the packet but which does not need to be encrypted. + * \param adlen Length of the associated data in bytes. + * \param npub Points to the public nonce for the packet which must + * be 12 bytes in length. + * \param k Points to the 16 bytes of the key to use to decrypt the packet. + * + * \return 0 on success, -1 if the authentication tag was incorrect, + * or some other negative number if there was an error in the parameters. + * + * \sa hyena_aead_encrypt() + */ +int hyena_v1_aead_decrypt + (unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k); /** - * \brief Encrypts and authenticates a packet with HYENA. + * \brief Encrypts and authenticates a packet with HYENA-v2. * * \param c Buffer to receive the output. * \param clen On exit, set to the length of the output which includes @@ -81,7 +151,7 @@ extern aead_cipher_t const hyena_cipher; * * \sa hyena_aead_decrypt() */ -int hyena_aead_encrypt +int hyena_v2_aead_encrypt (unsigned char *c, unsigned long long *clen, const unsigned char *m, unsigned long long mlen, const unsigned char *ad, unsigned long long adlen, @@ -90,7 +160,7 @@ int hyena_aead_encrypt const unsigned char *k); /** - * \brief Decrypts and authenticates a packet with HYENA. + * \brief Decrypts and authenticates a packet with HYENA-v2. * * \param m Buffer to receive the plaintext message on output. * \param mlen Receives the length of the plaintext message on output. @@ -111,7 +181,7 @@ int hyena_aead_encrypt * * \sa hyena_aead_encrypt() */ -int hyena_aead_decrypt +int hyena_v2_aead_decrypt (unsigned char *m, unsigned long long *mlen, unsigned char *nsec, const unsigned char *c, unsigned long long clen, diff --git a/orange/Implementations/crypto_aead/orangezestv1/rhys/internal-photon256-avr.S b/orange/Implementations/crypto_aead/orangezestv1/rhys/internal-photon256-avr.S new file mode 100644 index 0000000..5826dd3 --- /dev/null +++ b/orange/Implementations/crypto_aead/orangezestv1/rhys/internal-photon256-avr.S @@ -0,0 +1,2583 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .text +.global photon256_permute + .type photon256_permute, @function +photon256_permute: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 49 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + bst r18,0 + bld r2,0 + bst r18,1 + bld r6,0 + bst r18,2 + bld r10,0 + bst r18,3 + bld r14,0 + bst r18,4 + bld r2,1 + bst r18,5 + bld r6,1 + bst r18,6 + bld r10,1 + bst r18,7 + bld r14,1 + bst r19,0 + bld r2,2 + bst r19,1 + bld r6,2 + bst r19,2 + bld r10,2 + bst r19,3 + bld r14,2 + bst r19,4 + bld r2,3 + bst r19,5 + bld r6,3 + bst r19,6 + bld r10,3 + bst r19,7 + bld r14,3 + bst r20,0 + bld r2,4 + bst r20,1 + bld r6,4 + bst r20,2 + bld r10,4 + bst r20,3 + bld r14,4 + bst r20,4 + bld r2,5 + bst r20,5 + bld r6,5 + bst r20,6 + bld r10,5 + bst r20,7 + bld r14,5 + bst r21,0 + bld r2,6 + bst r21,1 + bld r6,6 + bst r21,2 + bld r10,6 + bst r21,3 + bld r14,6 + bst r21,4 + bld r2,7 + bst r21,5 + bld r6,7 + bst r21,6 + bld r10,7 + bst r21,7 + bld r14,7 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + bst r18,0 + bld r3,0 + bst r18,1 + bld r7,0 + bst r18,2 + bld r11,0 + bst r18,3 + bld r15,0 + bst r18,4 + bld r3,1 + bst r18,5 + bld r7,1 + bst r18,6 + bld r11,1 + bst r18,7 + bld r15,1 + bst r19,0 + bld r3,2 + bst r19,1 + bld r7,2 + bst r19,2 + bld r11,2 + bst r19,3 + bld r15,2 + bst r19,4 + bld r3,3 + bst r19,5 + bld r7,3 + bst r19,6 + bld r11,3 + bst r19,7 + bld r15,3 + bst r20,0 + bld r3,4 + bst r20,1 + bld r7,4 + bst r20,2 + bld r11,4 + bst r20,3 + bld r15,4 + bst r20,4 + bld r3,5 + bst r20,5 + bld r7,5 + bst r20,6 + bld r11,5 + bst r20,7 + bld r15,5 + bst r21,0 + bld r3,6 + bst r21,1 + bld r7,6 + bst r21,2 + bld r11,6 + bst r21,3 + bld r15,6 + bst r21,4 + bld r3,7 + bst r21,5 + bld r7,7 + bst r21,6 + bld r11,7 + bst r21,7 + bld r15,7 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + bst r18,0 + bld r4,0 + bst r18,1 + bld r8,0 + bst r18,2 + bld r12,0 + bst r18,3 + bld r24,0 + bst r18,4 + bld r4,1 + bst r18,5 + bld r8,1 + bst r18,6 + bld r12,1 + bst r18,7 + bld r24,1 + bst r19,0 + bld r4,2 + bst r19,1 + bld r8,2 + bst r19,2 + bld r12,2 + bst r19,3 + bld r24,2 + bst r19,4 + bld r4,3 + bst r19,5 + bld r8,3 + bst r19,6 + bld r12,3 + bst r19,7 + bld r24,3 + bst r20,0 + bld r4,4 + bst r20,1 + bld r8,4 + bst r20,2 + bld r12,4 + bst r20,3 + bld r24,4 + bst r20,4 + bld r4,5 + bst r20,5 + bld r8,5 + bst r20,6 + bld r12,5 + bst r20,7 + bld r24,5 + bst r21,0 + bld r4,6 + bst r21,1 + bld r8,6 + bst r21,2 + bld r12,6 + bst r21,3 + bld r24,6 + bst r21,4 + bld r4,7 + bst r21,5 + bld r8,7 + bst r21,6 + bld r12,7 + bst r21,7 + bld r24,7 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + bst r18,0 + bld r5,0 + bst r18,1 + bld r9,0 + bst r18,2 + bld r13,0 + bst r18,3 + bld r25,0 + bst r18,4 + bld r5,1 + bst r18,5 + bld r9,1 + bst r18,6 + bld r13,1 + bst r18,7 + bld r25,1 + bst r19,0 + bld r5,2 + bst r19,1 + bld r9,2 + bst r19,2 + bld r13,2 + bst r19,3 + bld r25,2 + bst r19,4 + bld r5,3 + bst r19,5 + bld r9,3 + bst r19,6 + bld r13,3 + bst r19,7 + bld r25,3 + bst r20,0 + bld r5,4 + bst r20,1 + bld r9,4 + bst r20,2 + bld r13,4 + bst r20,3 + bld r25,4 + bst r20,4 + bld r5,5 + bst r20,5 + bld r9,5 + bst r20,6 + bld r13,5 + bst r20,7 + bld r25,5 + bst r21,0 + bld r5,6 + bst r21,1 + bld r9,6 + bst r21,2 + bld r13,6 + bst r21,3 + bld r25,6 + bst r21,4 + bld r5,7 + bst r21,5 + bld r9,7 + bst r21,6 + bld r13,7 + bst r21,7 + bld r25,7 + std Z+16,r2 + std Z+17,r3 + std Z+18,r4 + std Z+19,r5 + std Z+20,r6 + std Z+21,r7 + std Z+22,r8 + std Z+23,r9 + std Z+24,r10 + std Z+25,r11 + std Z+26,r12 + std Z+27,r13 + std Z+28,r14 + std Z+29,r15 + std Z+30,r24 + std Z+31,r25 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + bst r18,0 + bld r2,0 + bst r18,1 + bld r6,0 + bst r18,2 + bld r10,0 + bst r18,3 + bld r14,0 + bst r18,4 + bld r2,1 + bst r18,5 + bld r6,1 + bst r18,6 + bld r10,1 + bst r18,7 + bld r14,1 + bst r19,0 + bld r2,2 + bst r19,1 + bld r6,2 + bst r19,2 + bld r10,2 + bst r19,3 + bld r14,2 + bst r19,4 + bld r2,3 + bst r19,5 + bld r6,3 + bst r19,6 + bld r10,3 + bst r19,7 + bld r14,3 + bst r20,0 + bld r2,4 + bst r20,1 + bld r6,4 + bst r20,2 + bld r10,4 + bst r20,3 + bld r14,4 + bst r20,4 + bld r2,5 + bst r20,5 + bld r6,5 + bst r20,6 + bld r10,5 + bst r20,7 + bld r14,5 + bst r21,0 + bld r2,6 + bst r21,1 + bld r6,6 + bst r21,2 + bld r10,6 + bst r21,3 + bld r14,6 + bst r21,4 + bld r2,7 + bst r21,5 + bld r6,7 + bst r21,6 + bld r10,7 + bst r21,7 + bld r14,7 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r20,Z+6 + ldd r21,Z+7 + bst r18,0 + bld r3,0 + bst r18,1 + bld r7,0 + bst r18,2 + bld r11,0 + bst r18,3 + bld r15,0 + bst r18,4 + bld r3,1 + bst r18,5 + bld r7,1 + bst r18,6 + bld r11,1 + bst r18,7 + bld r15,1 + bst r19,0 + bld r3,2 + bst r19,1 + bld r7,2 + bst r19,2 + bld r11,2 + bst r19,3 + bld r15,2 + bst r19,4 + bld r3,3 + bst r19,5 + bld r7,3 + bst r19,6 + bld r11,3 + bst r19,7 + bld r15,3 + bst r20,0 + bld r3,4 + bst r20,1 + bld r7,4 + bst r20,2 + bld r11,4 + bst r20,3 + bld r15,4 + bst r20,4 + bld r3,5 + bst r20,5 + bld r7,5 + bst r20,6 + bld r11,5 + bst r20,7 + bld r15,5 + bst r21,0 + bld r3,6 + bst r21,1 + bld r7,6 + bst r21,2 + bld r11,6 + bst r21,3 + bld r15,6 + bst r21,4 + bld r3,7 + bst r21,5 + bld r7,7 + bst r21,6 + bld r11,7 + bst r21,7 + bld r15,7 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r20,Z+10 + ldd r21,Z+11 + bst r18,0 + bld r4,0 + bst r18,1 + bld r8,0 + bst r18,2 + bld r12,0 + bst r18,3 + bld r24,0 + bst r18,4 + bld r4,1 + bst r18,5 + bld r8,1 + bst r18,6 + bld r12,1 + bst r18,7 + bld r24,1 + bst r19,0 + bld r4,2 + bst r19,1 + bld r8,2 + bst r19,2 + bld r12,2 + bst r19,3 + bld r24,2 + bst r19,4 + bld r4,3 + bst r19,5 + bld r8,3 + bst r19,6 + bld r12,3 + bst r19,7 + bld r24,3 + bst r20,0 + bld r4,4 + bst r20,1 + bld r8,4 + bst r20,2 + bld r12,4 + bst r20,3 + bld r24,4 + bst r20,4 + bld r4,5 + bst r20,5 + bld r8,5 + bst r20,6 + bld r12,5 + bst r20,7 + bld r24,5 + bst r21,0 + bld r4,6 + bst r21,1 + bld r8,6 + bst r21,2 + bld r12,6 + bst r21,3 + bld r24,6 + bst r21,4 + bld r4,7 + bst r21,5 + bld r8,7 + bst r21,6 + bld r12,7 + bst r21,7 + bld r24,7 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r20,Z+14 + ldd r21,Z+15 + bst r18,0 + bld r5,0 + bst r18,1 + bld r9,0 + bst r18,2 + bld r13,0 + bst r18,3 + bld r25,0 + bst r18,4 + bld r5,1 + bst r18,5 + bld r9,1 + bst r18,6 + bld r13,1 + bst r18,7 + bld r25,1 + bst r19,0 + bld r5,2 + bst r19,1 + bld r9,2 + bst r19,2 + bld r13,2 + bst r19,3 + bld r25,2 + bst r19,4 + bld r5,3 + bst r19,5 + bld r9,3 + bst r19,6 + bld r13,3 + bst r19,7 + bld r25,3 + bst r20,0 + bld r5,4 + bst r20,1 + bld r9,4 + bst r20,2 + bld r13,4 + bst r20,3 + bld r25,4 + bst r20,4 + bld r5,5 + bst r20,5 + bld r9,5 + bst r20,6 + bld r13,5 + bst r20,7 + bld r25,5 + bst r21,0 + bld r5,6 + bst r21,1 + bld r9,6 + bst r21,2 + bld r13,6 + bst r21,3 + bld r25,6 + bst r21,4 + bld r5,7 + bst r21,5 + bld r9,7 + bst r21,6 + bld r13,7 + bst r21,7 + bld r25,7 + ldi r22,225 + ldi r23,240 + ldi r26,210 + ldi r27,150 + rcall 621f + ldi r22,195 + ldi r23,210 + ldi r26,240 + ldi r27,180 + rcall 621f + ldi r22,135 + ldi r23,150 + ldi r26,180 + ldi r27,240 + rcall 621f + ldi r22,30 + ldi r23,15 + ldi r26,45 + ldi r27,105 + rcall 621f + ldi r22,45 + ldi r23,60 + ldi r26,30 + ldi r27,90 + rcall 621f + ldi r22,75 + ldi r23,90 + ldi r26,120 + ldi r27,60 + rcall 621f + ldi r22,150 + ldi r23,135 + ldi r26,165 + ldi r27,225 + rcall 621f + ldi r22,60 + ldi r23,45 + ldi r26,15 + ldi r27,75 + rcall 621f + ldi r22,105 + ldi r23,120 + ldi r26,90 + ldi r27,30 + rcall 621f + ldi r22,210 + ldi r23,195 + ldi r26,225 + ldi r27,165 + rcall 621f + ldi r22,165 + ldi r23,180 + ldi r26,150 + ldi r27,210 + rcall 621f + ldi r22,90 + ldi r23,75 + ldi r26,105 + ldi r27,45 + rcall 621f + rjmp 1960f +621: + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r2,r18 + eor r3,r19 + eor r4,r20 + eor r5,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r6,r18 + eor r7,r19 + eor r8,r20 + eor r9,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r10,r18 + eor r11,r19 + eor r12,r20 + eor r13,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r14,r18 + eor r15,r19 + eor r24,r20 + eor r25,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + eor r6,r10 + mov r0,r10 + and r0,r6 + eor r14,r0 + mov r18,r14 + and r14,r6 + eor r14,r10 + mov r16,r14 + eor r14,r2 + com r14 + mov r10,r14 + or r16,r2 + eor r2,r18 + eor r6,r2 + or r10,r6 + eor r10,r18 + eor r6,r16 + eor r14,r6 + eor r7,r11 + mov r0,r11 + and r0,r7 + eor r15,r0 + mov r19,r15 + and r15,r7 + eor r15,r11 + mov r16,r15 + eor r15,r3 + com r15 + mov r11,r15 + or r16,r3 + eor r3,r19 + eor r7,r3 + or r11,r7 + eor r11,r19 + eor r7,r16 + eor r15,r7 + eor r8,r12 + mov r0,r12 + and r0,r8 + eor r24,r0 + mov r20,r24 + and r24,r8 + eor r24,r12 + mov r16,r24 + eor r24,r4 + com r24 + mov r12,r24 + or r16,r4 + eor r4,r20 + eor r8,r4 + or r12,r8 + eor r12,r20 + eor r8,r16 + eor r24,r8 + eor r9,r13 + mov r0,r13 + and r0,r9 + eor r25,r0 + mov r21,r25 + and r25,r9 + eor r25,r13 + mov r16,r25 + eor r25,r5 + com r25 + mov r13,r25 + or r16,r5 + eor r5,r21 + eor r9,r5 + or r13,r9 + eor r13,r21 + eor r9,r16 + eor r25,r9 + bst r3,0 + lsr r3 + bld r3,7 + bst r7,0 + lsr r7 + bld r7,7 + bst r11,0 + lsr r11 + bld r11,7 + bst r15,0 + lsr r15 + bld r15,7 + mov r0,r1 + lsr r4 + ror r0 + lsr r4 + ror r0 + or r4,r0 + mov r0,r1 + lsr r8 + ror r0 + lsr r8 + ror r0 + or r8,r0 + mov r0,r1 + lsr r12 + ror r0 + lsr r12 + ror r0 + or r12,r0 + mov r0,r1 + lsr r24 + ror r0 + lsr r24 + ror r0 + or r24,r0 + mov r0,r1 + lsr r5 + ror r0 + lsr r5 + ror r0 + lsr r5 + ror r0 + or r5,r0 + mov r0,r1 + lsr r9 + ror r0 + lsr r9 + ror r0 + lsr r9 + ror r0 + or r9,r0 + mov r0,r1 + lsr r13 + ror r0 + lsr r13 + ror r0 + lsr r13 + ror r0 + or r13,r0 + mov r0,r1 + lsr r25 + ror r0 + lsr r25 + ror r0 + lsr r25 + ror r0 + or r25,r0 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Z+16 + ldd r3,Z+17 + ldd r4,Z+18 + ldd r5,Z+19 + ldd r6,Z+20 + ldd r7,Z+21 + ldd r8,Z+22 + ldd r9,Z+23 + ldd r10,Z+24 + ldd r11,Z+25 + ldd r12,Z+26 + ldd r13,Z+27 + ldd r14,Z+28 + ldd r15,Z+29 + ldd r24,Z+30 + ldd r25,Z+31 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r2,r18 + eor r3,r19 + eor r4,r20 + eor r5,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r6,r18 + eor r7,r19 + eor r8,r20 + eor r9,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r10,r18 + eor r11,r19 + eor r12,r20 + eor r13,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + eor r14,r22 + eor r15,r23 + eor r24,r26 + eor r25,r27 + eor r6,r10 + mov r0,r10 + and r0,r6 + eor r14,r0 + mov r18,r14 + and r14,r6 + eor r14,r10 + mov r22,r14 + eor r14,r2 + com r14 + mov r10,r14 + or r22,r2 + eor r2,r18 + eor r6,r2 + or r10,r6 + eor r10,r18 + eor r6,r22 + eor r14,r6 + eor r7,r11 + mov r0,r11 + and r0,r7 + eor r15,r0 + mov r19,r15 + and r15,r7 + eor r15,r11 + mov r22,r15 + eor r15,r3 + com r15 + mov r11,r15 + or r22,r3 + eor r3,r19 + eor r7,r3 + or r11,r7 + eor r11,r19 + eor r7,r22 + eor r15,r7 + eor r8,r12 + mov r0,r12 + and r0,r8 + eor r24,r0 + mov r20,r24 + and r24,r8 + eor r24,r12 + mov r22,r24 + eor r24,r4 + com r24 + mov r12,r24 + or r22,r4 + eor r4,r20 + eor r8,r4 + or r12,r8 + eor r12,r20 + eor r8,r22 + eor r24,r8 + eor r9,r13 + mov r0,r13 + and r0,r9 + eor r25,r0 + mov r21,r25 + and r25,r9 + eor r25,r13 + mov r22,r25 + eor r25,r5 + com r25 + mov r13,r25 + or r22,r5 + eor r5,r21 + eor r9,r5 + or r13,r9 + eor r13,r21 + eor r9,r22 + eor r25,r9 + swap r2 + swap r6 + swap r10 + swap r14 + lsl r3 + adc r3,r1 + lsl r3 + adc r3,r1 + lsl r3 + adc r3,r1 + lsl r7 + adc r7,r1 + lsl r7 + adc r7,r1 + lsl r7 + adc r7,r1 + lsl r11 + adc r11,r1 + lsl r11 + adc r11,r1 + lsl r11 + adc r11,r1 + lsl r15 + adc r15,r1 + lsl r15 + adc r15,r1 + lsl r15 + adc r15,r1 + lsl r4 + adc r4,r1 + lsl r4 + adc r4,r1 + lsl r8 + adc r8,r1 + lsl r8 + adc r8,r1 + lsl r12 + adc r12,r1 + lsl r12 + adc r12,r1 + lsl r24 + adc r24,r1 + lsl r24 + adc r24,r1 + lsl r5 + adc r5,r1 + lsl r9 + adc r9,r1 + lsl r13 + adc r13,r1 + lsl r25 + adc r25,r1 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r6,Y+2 + ldd r10,Y+3 + ldd r14,Y+4 + ldd r3,Y+5 + ldd r7,Y+6 + ldd r11,Y+7 + ldd r15,Y+8 + ldd r4,Y+9 + ldd r8,Y+10 + ldd r12,Y+11 + ldd r24,Y+12 + ldd r5,Y+13 + ldd r9,Y+14 + ldd r13,Y+15 + ldd r25,Y+16 + movw r22,r2 + movw r26,r4 + eor r22,r27 + mov r18,r27 + mov r19,r22 + mov r20,r23 + mov r21,r26 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + st Z,r18 + std Z+4,r19 + std Z+8,r20 + std Z+12,r21 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + movw r18,r26 + movw r20,r22 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+1,r18 + std Z+5,r19 + std Z+9,r20 + std Z+13,r21 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + movw r18,r26 + movw r20,r22 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+2,r18 + std Z+6,r19 + std Z+10,r20 + std Z+14,r21 + movw r18,r2 + movw r20,r4 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + std Z+3,r18 + std Z+7,r19 + std Z+11,r20 + std Z+15,r21 + movw r18,r2 + movw r20,r4 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+16,r18 + std Z+20,r19 + std Z+24,r20 + std Z+28,r21 + movw r18,r2 + movw r20,r4 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+17,r18 + std Z+21,r19 + std Z+25,r20 + std Z+29,r21 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + movw r18,r26 + movw r20,r22 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+18,r18 + std Z+22,r19 + std Z+26,r20 + std Z+30,r21 + movw r18,r2 + movw r20,r4 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+19,r18 + std Z+23,r19 + std Z+27,r20 + std Z+31,r21 + ldd r2,Y+17 + ldd r6,Y+18 + ldd r10,Y+19 + ldd r14,Y+20 + ldd r3,Y+21 + ldd r7,Y+22 + ldd r11,Y+23 + ldd r15,Y+24 + ldd r4,Y+25 + ldd r8,Y+26 + ldd r12,Y+27 + ldd r24,Y+28 + ldd r5,Y+29 + ldd r9,Y+30 + ldd r13,Y+31 + ldd r25,Y+32 + ld r18,Z + ldd r19,Z+4 + ldd r20,Z+8 + ldd r21,Z+12 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + st Z,r18 + std Z+4,r19 + std Z+8,r20 + std Z+12,r21 + ldd r18,Z+1 + ldd r19,Z+5 + ldd r20,Z+9 + ldd r21,Z+13 + eor r18,r2 + eor r19,r3 + eor r20,r4 + eor r21,r5 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + std Z+1,r18 + std Z+5,r19 + std Z+9,r20 + std Z+13,r21 + ldd r18,Z+2 + ldd r19,Z+6 + ldd r20,Z+10 + ldd r21,Z+14 + eor r18,r2 + eor r19,r3 + eor r20,r4 + eor r21,r5 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+2,r18 + std Z+6,r19 + std Z+10,r20 + std Z+14,r21 + ldd r18,Z+3 + ldd r19,Z+7 + ldd r20,Z+11 + ldd r21,Z+15 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+3,r18 + std Z+7,r19 + std Z+11,r20 + std Z+15,r21 + ldd r18,Z+16 + ldd r19,Z+20 + ldd r20,Z+24 + ldd r21,Z+28 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+16,r18 + std Z+20,r19 + std Z+24,r20 + std Z+28,r21 + ldd r18,Z+17 + ldd r19,Z+21 + ldd r20,Z+25 + ldd r21,Z+29 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + std Z+17,r18 + std Z+21,r19 + std Z+25,r20 + std Z+29,r21 + ldd r18,Z+18 + ldd r19,Z+22 + ldd r20,Z+26 + ldd r21,Z+30 + eor r18,r2 + eor r19,r3 + eor r20,r4 + eor r21,r5 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+18,r18 + std Z+22,r19 + std Z+26,r20 + std Z+30,r21 + ldd r18,Z+19 + ldd r19,Z+23 + ldd r20,Z+27 + ldd r21,Z+31 + eor r18,r2 + eor r19,r3 + eor r20,r4 + eor r21,r5 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + std Z+19,r18 + std Z+23,r19 + std Z+27,r20 + std Z+31,r21 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r24,Z+14 + ldd r25,Z+15 + ret +1960: + bst r2,0 + bld r18,0 + bst r6,0 + bld r18,1 + bst r10,0 + bld r18,2 + bst r14,0 + bld r18,3 + bst r2,1 + bld r18,4 + bst r6,1 + bld r18,5 + bst r10,1 + bld r18,6 + bst r14,1 + bld r18,7 + bst r2,2 + bld r19,0 + bst r6,2 + bld r19,1 + bst r10,2 + bld r19,2 + bst r14,2 + bld r19,3 + bst r2,3 + bld r19,4 + bst r6,3 + bld r19,5 + bst r10,3 + bld r19,6 + bst r14,3 + bld r19,7 + bst r2,4 + bld r20,0 + bst r6,4 + bld r20,1 + bst r10,4 + bld r20,2 + bst r14,4 + bld r20,3 + bst r2,5 + bld r20,4 + bst r6,5 + bld r20,5 + bst r10,5 + bld r20,6 + bst r14,5 + bld r20,7 + bst r2,6 + bld r21,0 + bst r6,6 + bld r21,1 + bst r10,6 + bld r21,2 + bst r14,6 + bld r21,3 + bst r2,7 + bld r21,4 + bst r6,7 + bld r21,5 + bst r10,7 + bld r21,6 + bst r14,7 + bld r21,7 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + bst r3,0 + bld r18,0 + bst r7,0 + bld r18,1 + bst r11,0 + bld r18,2 + bst r15,0 + bld r18,3 + bst r3,1 + bld r18,4 + bst r7,1 + bld r18,5 + bst r11,1 + bld r18,6 + bst r15,1 + bld r18,7 + bst r3,2 + bld r19,0 + bst r7,2 + bld r19,1 + bst r11,2 + bld r19,2 + bst r15,2 + bld r19,3 + bst r3,3 + bld r19,4 + bst r7,3 + bld r19,5 + bst r11,3 + bld r19,6 + bst r15,3 + bld r19,7 + bst r3,4 + bld r20,0 + bst r7,4 + bld r20,1 + bst r11,4 + bld r20,2 + bst r15,4 + bld r20,3 + bst r3,5 + bld r20,4 + bst r7,5 + bld r20,5 + bst r11,5 + bld r20,6 + bst r15,5 + bld r20,7 + bst r3,6 + bld r21,0 + bst r7,6 + bld r21,1 + bst r11,6 + bld r21,2 + bst r15,6 + bld r21,3 + bst r3,7 + bld r21,4 + bst r7,7 + bld r21,5 + bst r11,7 + bld r21,6 + bst r15,7 + bld r21,7 + std Z+4,r18 + std Z+5,r19 + std Z+6,r20 + std Z+7,r21 + bst r4,0 + bld r18,0 + bst r8,0 + bld r18,1 + bst r12,0 + bld r18,2 + bst r24,0 + bld r18,3 + bst r4,1 + bld r18,4 + bst r8,1 + bld r18,5 + bst r12,1 + bld r18,6 + bst r24,1 + bld r18,7 + bst r4,2 + bld r19,0 + bst r8,2 + bld r19,1 + bst r12,2 + bld r19,2 + bst r24,2 + bld r19,3 + bst r4,3 + bld r19,4 + bst r8,3 + bld r19,5 + bst r12,3 + bld r19,6 + bst r24,3 + bld r19,7 + bst r4,4 + bld r20,0 + bst r8,4 + bld r20,1 + bst r12,4 + bld r20,2 + bst r24,4 + bld r20,3 + bst r4,5 + bld r20,4 + bst r8,5 + bld r20,5 + bst r12,5 + bld r20,6 + bst r24,5 + bld r20,7 + bst r4,6 + bld r21,0 + bst r8,6 + bld r21,1 + bst r12,6 + bld r21,2 + bst r24,6 + bld r21,3 + bst r4,7 + bld r21,4 + bst r8,7 + bld r21,5 + bst r12,7 + bld r21,6 + bst r24,7 + bld r21,7 + std Z+8,r18 + std Z+9,r19 + std Z+10,r20 + std Z+11,r21 + bst r5,0 + bld r18,0 + bst r9,0 + bld r18,1 + bst r13,0 + bld r18,2 + bst r25,0 + bld r18,3 + bst r5,1 + bld r18,4 + bst r9,1 + bld r18,5 + bst r13,1 + bld r18,6 + bst r25,1 + bld r18,7 + bst r5,2 + bld r19,0 + bst r9,2 + bld r19,1 + bst r13,2 + bld r19,2 + bst r25,2 + bld r19,3 + bst r5,3 + bld r19,4 + bst r9,3 + bld r19,5 + bst r13,3 + bld r19,6 + bst r25,3 + bld r19,7 + bst r5,4 + bld r20,0 + bst r9,4 + bld r20,1 + bst r13,4 + bld r20,2 + bst r25,4 + bld r20,3 + bst r5,5 + bld r20,4 + bst r9,5 + bld r20,5 + bst r13,5 + bld r20,6 + bst r25,5 + bld r20,7 + bst r5,6 + bld r21,0 + bst r9,6 + bld r21,1 + bst r13,6 + bld r21,2 + bst r25,6 + bld r21,3 + bst r5,7 + bld r21,4 + bst r9,7 + bld r21,5 + bst r13,7 + bld r21,6 + bst r25,7 + bld r21,7 + std Z+12,r18 + std Z+13,r19 + std Z+14,r20 + std Z+15,r21 + ldd r2,Z+16 + ldd r3,Z+17 + ldd r4,Z+18 + ldd r5,Z+19 + ldd r6,Z+20 + ldd r7,Z+21 + ldd r8,Z+22 + ldd r9,Z+23 + ldd r10,Z+24 + ldd r11,Z+25 + ldd r12,Z+26 + ldd r13,Z+27 + ldd r14,Z+28 + ldd r15,Z+29 + ldd r24,Z+30 + ldd r25,Z+31 + bst r2,0 + bld r18,0 + bst r6,0 + bld r18,1 + bst r10,0 + bld r18,2 + bst r14,0 + bld r18,3 + bst r2,1 + bld r18,4 + bst r6,1 + bld r18,5 + bst r10,1 + bld r18,6 + bst r14,1 + bld r18,7 + bst r2,2 + bld r19,0 + bst r6,2 + bld r19,1 + bst r10,2 + bld r19,2 + bst r14,2 + bld r19,3 + bst r2,3 + bld r19,4 + bst r6,3 + bld r19,5 + bst r10,3 + bld r19,6 + bst r14,3 + bld r19,7 + bst r2,4 + bld r20,0 + bst r6,4 + bld r20,1 + bst r10,4 + bld r20,2 + bst r14,4 + bld r20,3 + bst r2,5 + bld r20,4 + bst r6,5 + bld r20,5 + bst r10,5 + bld r20,6 + bst r14,5 + bld r20,7 + bst r2,6 + bld r21,0 + bst r6,6 + bld r21,1 + bst r10,6 + bld r21,2 + bst r14,6 + bld r21,3 + bst r2,7 + bld r21,4 + bst r6,7 + bld r21,5 + bst r10,7 + bld r21,6 + bst r14,7 + bld r21,7 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + bst r3,0 + bld r18,0 + bst r7,0 + bld r18,1 + bst r11,0 + bld r18,2 + bst r15,0 + bld r18,3 + bst r3,1 + bld r18,4 + bst r7,1 + bld r18,5 + bst r11,1 + bld r18,6 + bst r15,1 + bld r18,7 + bst r3,2 + bld r19,0 + bst r7,2 + bld r19,1 + bst r11,2 + bld r19,2 + bst r15,2 + bld r19,3 + bst r3,3 + bld r19,4 + bst r7,3 + bld r19,5 + bst r11,3 + bld r19,6 + bst r15,3 + bld r19,7 + bst r3,4 + bld r20,0 + bst r7,4 + bld r20,1 + bst r11,4 + bld r20,2 + bst r15,4 + bld r20,3 + bst r3,5 + bld r20,4 + bst r7,5 + bld r20,5 + bst r11,5 + bld r20,6 + bst r15,5 + bld r20,7 + bst r3,6 + bld r21,0 + bst r7,6 + bld r21,1 + bst r11,6 + bld r21,2 + bst r15,6 + bld r21,3 + bst r3,7 + bld r21,4 + bst r7,7 + bld r21,5 + bst r11,7 + bld r21,6 + bst r15,7 + bld r21,7 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + bst r4,0 + bld r18,0 + bst r8,0 + bld r18,1 + bst r12,0 + bld r18,2 + bst r24,0 + bld r18,3 + bst r4,1 + bld r18,4 + bst r8,1 + bld r18,5 + bst r12,1 + bld r18,6 + bst r24,1 + bld r18,7 + bst r4,2 + bld r19,0 + bst r8,2 + bld r19,1 + bst r12,2 + bld r19,2 + bst r24,2 + bld r19,3 + bst r4,3 + bld r19,4 + bst r8,3 + bld r19,5 + bst r12,3 + bld r19,6 + bst r24,3 + bld r19,7 + bst r4,4 + bld r20,0 + bst r8,4 + bld r20,1 + bst r12,4 + bld r20,2 + bst r24,4 + bld r20,3 + bst r4,5 + bld r20,4 + bst r8,5 + bld r20,5 + bst r12,5 + bld r20,6 + bst r24,5 + bld r20,7 + bst r4,6 + bld r21,0 + bst r8,6 + bld r21,1 + bst r12,6 + bld r21,2 + bst r24,6 + bld r21,3 + bst r4,7 + bld r21,4 + bst r8,7 + bld r21,5 + bst r12,7 + bld r21,6 + bst r24,7 + bld r21,7 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + bst r5,0 + bld r18,0 + bst r9,0 + bld r18,1 + bst r13,0 + bld r18,2 + bst r25,0 + bld r18,3 + bst r5,1 + bld r18,4 + bst r9,1 + bld r18,5 + bst r13,1 + bld r18,6 + bst r25,1 + bld r18,7 + bst r5,2 + bld r19,0 + bst r9,2 + bld r19,1 + bst r13,2 + bld r19,2 + bst r25,2 + bld r19,3 + bst r5,3 + bld r19,4 + bst r9,3 + bld r19,5 + bst r13,3 + bld r19,6 + bst r25,3 + bld r19,7 + bst r5,4 + bld r20,0 + bst r9,4 + bld r20,1 + bst r13,4 + bld r20,2 + bst r25,4 + bld r20,3 + bst r5,5 + bld r20,4 + bst r9,5 + bld r20,5 + bst r13,5 + bld r20,6 + bst r25,5 + bld r20,7 + bst r5,6 + bld r21,0 + bst r9,6 + bld r21,1 + bst r13,6 + bld r21,2 + bst r25,6 + bld r21,3 + bst r5,7 + bld r21,4 + bst r9,7 + bld r21,5 + bst r13,7 + bld r21,6 + bst r25,7 + bld r21,7 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size photon256_permute, .-photon256_permute + +#endif diff --git a/orange/Implementations/crypto_aead/orangezestv1/rhys/internal-photon256.c b/orange/Implementations/crypto_aead/orangezestv1/rhys/internal-photon256.c index b8743fe..5cb7dd1 100644 --- a/orange/Implementations/crypto_aead/orangezestv1/rhys/internal-photon256.c +++ b/orange/Implementations/crypto_aead/orangezestv1/rhys/internal-photon256.c @@ -23,6 +23,8 @@ #include "internal-photon256.h" #include "internal-util.h" +#if !defined(__AVR__) + /** * \brief Number of rounds in the PHOTON-256 permutation in bit-sliced form. */ @@ -477,3 +479,5 @@ void photon256_permute(unsigned char state[PHOTON256_STATE_SIZE]) /* Convert back from bit-sliced form to regular form */ photon256_from_sliced(state, S.bytes); } + +#endif /* !__AVR__ */ diff --git a/orange/Implementations/crypto_hash/orangishv1/rhys/internal-photon256-avr.S b/orange/Implementations/crypto_hash/orangishv1/rhys/internal-photon256-avr.S new file mode 100644 index 0000000..5826dd3 --- /dev/null +++ b/orange/Implementations/crypto_hash/orangishv1/rhys/internal-photon256-avr.S @@ -0,0 +1,2583 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .text +.global photon256_permute + .type photon256_permute, @function +photon256_permute: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 49 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + bst r18,0 + bld r2,0 + bst r18,1 + bld r6,0 + bst r18,2 + bld r10,0 + bst r18,3 + bld r14,0 + bst r18,4 + bld r2,1 + bst r18,5 + bld r6,1 + bst r18,6 + bld r10,1 + bst r18,7 + bld r14,1 + bst r19,0 + bld r2,2 + bst r19,1 + bld r6,2 + bst r19,2 + bld r10,2 + bst r19,3 + bld r14,2 + bst r19,4 + bld r2,3 + bst r19,5 + bld r6,3 + bst r19,6 + bld r10,3 + bst r19,7 + bld r14,3 + bst r20,0 + bld r2,4 + bst r20,1 + bld r6,4 + bst r20,2 + bld r10,4 + bst r20,3 + bld r14,4 + bst r20,4 + bld r2,5 + bst r20,5 + bld r6,5 + bst r20,6 + bld r10,5 + bst r20,7 + bld r14,5 + bst r21,0 + bld r2,6 + bst r21,1 + bld r6,6 + bst r21,2 + bld r10,6 + bst r21,3 + bld r14,6 + bst r21,4 + bld r2,7 + bst r21,5 + bld r6,7 + bst r21,6 + bld r10,7 + bst r21,7 + bld r14,7 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + bst r18,0 + bld r3,0 + bst r18,1 + bld r7,0 + bst r18,2 + bld r11,0 + bst r18,3 + bld r15,0 + bst r18,4 + bld r3,1 + bst r18,5 + bld r7,1 + bst r18,6 + bld r11,1 + bst r18,7 + bld r15,1 + bst r19,0 + bld r3,2 + bst r19,1 + bld r7,2 + bst r19,2 + bld r11,2 + bst r19,3 + bld r15,2 + bst r19,4 + bld r3,3 + bst r19,5 + bld r7,3 + bst r19,6 + bld r11,3 + bst r19,7 + bld r15,3 + bst r20,0 + bld r3,4 + bst r20,1 + bld r7,4 + bst r20,2 + bld r11,4 + bst r20,3 + bld r15,4 + bst r20,4 + bld r3,5 + bst r20,5 + bld r7,5 + bst r20,6 + bld r11,5 + bst r20,7 + bld r15,5 + bst r21,0 + bld r3,6 + bst r21,1 + bld r7,6 + bst r21,2 + bld r11,6 + bst r21,3 + bld r15,6 + bst r21,4 + bld r3,7 + bst r21,5 + bld r7,7 + bst r21,6 + bld r11,7 + bst r21,7 + bld r15,7 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + bst r18,0 + bld r4,0 + bst r18,1 + bld r8,0 + bst r18,2 + bld r12,0 + bst r18,3 + bld r24,0 + bst r18,4 + bld r4,1 + bst r18,5 + bld r8,1 + bst r18,6 + bld r12,1 + bst r18,7 + bld r24,1 + bst r19,0 + bld r4,2 + bst r19,1 + bld r8,2 + bst r19,2 + bld r12,2 + bst r19,3 + bld r24,2 + bst r19,4 + bld r4,3 + bst r19,5 + bld r8,3 + bst r19,6 + bld r12,3 + bst r19,7 + bld r24,3 + bst r20,0 + bld r4,4 + bst r20,1 + bld r8,4 + bst r20,2 + bld r12,4 + bst r20,3 + bld r24,4 + bst r20,4 + bld r4,5 + bst r20,5 + bld r8,5 + bst r20,6 + bld r12,5 + bst r20,7 + bld r24,5 + bst r21,0 + bld r4,6 + bst r21,1 + bld r8,6 + bst r21,2 + bld r12,6 + bst r21,3 + bld r24,6 + bst r21,4 + bld r4,7 + bst r21,5 + bld r8,7 + bst r21,6 + bld r12,7 + bst r21,7 + bld r24,7 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + bst r18,0 + bld r5,0 + bst r18,1 + bld r9,0 + bst r18,2 + bld r13,0 + bst r18,3 + bld r25,0 + bst r18,4 + bld r5,1 + bst r18,5 + bld r9,1 + bst r18,6 + bld r13,1 + bst r18,7 + bld r25,1 + bst r19,0 + bld r5,2 + bst r19,1 + bld r9,2 + bst r19,2 + bld r13,2 + bst r19,3 + bld r25,2 + bst r19,4 + bld r5,3 + bst r19,5 + bld r9,3 + bst r19,6 + bld r13,3 + bst r19,7 + bld r25,3 + bst r20,0 + bld r5,4 + bst r20,1 + bld r9,4 + bst r20,2 + bld r13,4 + bst r20,3 + bld r25,4 + bst r20,4 + bld r5,5 + bst r20,5 + bld r9,5 + bst r20,6 + bld r13,5 + bst r20,7 + bld r25,5 + bst r21,0 + bld r5,6 + bst r21,1 + bld r9,6 + bst r21,2 + bld r13,6 + bst r21,3 + bld r25,6 + bst r21,4 + bld r5,7 + bst r21,5 + bld r9,7 + bst r21,6 + bld r13,7 + bst r21,7 + bld r25,7 + std Z+16,r2 + std Z+17,r3 + std Z+18,r4 + std Z+19,r5 + std Z+20,r6 + std Z+21,r7 + std Z+22,r8 + std Z+23,r9 + std Z+24,r10 + std Z+25,r11 + std Z+26,r12 + std Z+27,r13 + std Z+28,r14 + std Z+29,r15 + std Z+30,r24 + std Z+31,r25 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + bst r18,0 + bld r2,0 + bst r18,1 + bld r6,0 + bst r18,2 + bld r10,0 + bst r18,3 + bld r14,0 + bst r18,4 + bld r2,1 + bst r18,5 + bld r6,1 + bst r18,6 + bld r10,1 + bst r18,7 + bld r14,1 + bst r19,0 + bld r2,2 + bst r19,1 + bld r6,2 + bst r19,2 + bld r10,2 + bst r19,3 + bld r14,2 + bst r19,4 + bld r2,3 + bst r19,5 + bld r6,3 + bst r19,6 + bld r10,3 + bst r19,7 + bld r14,3 + bst r20,0 + bld r2,4 + bst r20,1 + bld r6,4 + bst r20,2 + bld r10,4 + bst r20,3 + bld r14,4 + bst r20,4 + bld r2,5 + bst r20,5 + bld r6,5 + bst r20,6 + bld r10,5 + bst r20,7 + bld r14,5 + bst r21,0 + bld r2,6 + bst r21,1 + bld r6,6 + bst r21,2 + bld r10,6 + bst r21,3 + bld r14,6 + bst r21,4 + bld r2,7 + bst r21,5 + bld r6,7 + bst r21,6 + bld r10,7 + bst r21,7 + bld r14,7 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r20,Z+6 + ldd r21,Z+7 + bst r18,0 + bld r3,0 + bst r18,1 + bld r7,0 + bst r18,2 + bld r11,0 + bst r18,3 + bld r15,0 + bst r18,4 + bld r3,1 + bst r18,5 + bld r7,1 + bst r18,6 + bld r11,1 + bst r18,7 + bld r15,1 + bst r19,0 + bld r3,2 + bst r19,1 + bld r7,2 + bst r19,2 + bld r11,2 + bst r19,3 + bld r15,2 + bst r19,4 + bld r3,3 + bst r19,5 + bld r7,3 + bst r19,6 + bld r11,3 + bst r19,7 + bld r15,3 + bst r20,0 + bld r3,4 + bst r20,1 + bld r7,4 + bst r20,2 + bld r11,4 + bst r20,3 + bld r15,4 + bst r20,4 + bld r3,5 + bst r20,5 + bld r7,5 + bst r20,6 + bld r11,5 + bst r20,7 + bld r15,5 + bst r21,0 + bld r3,6 + bst r21,1 + bld r7,6 + bst r21,2 + bld r11,6 + bst r21,3 + bld r15,6 + bst r21,4 + bld r3,7 + bst r21,5 + bld r7,7 + bst r21,6 + bld r11,7 + bst r21,7 + bld r15,7 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r20,Z+10 + ldd r21,Z+11 + bst r18,0 + bld r4,0 + bst r18,1 + bld r8,0 + bst r18,2 + bld r12,0 + bst r18,3 + bld r24,0 + bst r18,4 + bld r4,1 + bst r18,5 + bld r8,1 + bst r18,6 + bld r12,1 + bst r18,7 + bld r24,1 + bst r19,0 + bld r4,2 + bst r19,1 + bld r8,2 + bst r19,2 + bld r12,2 + bst r19,3 + bld r24,2 + bst r19,4 + bld r4,3 + bst r19,5 + bld r8,3 + bst r19,6 + bld r12,3 + bst r19,7 + bld r24,3 + bst r20,0 + bld r4,4 + bst r20,1 + bld r8,4 + bst r20,2 + bld r12,4 + bst r20,3 + bld r24,4 + bst r20,4 + bld r4,5 + bst r20,5 + bld r8,5 + bst r20,6 + bld r12,5 + bst r20,7 + bld r24,5 + bst r21,0 + bld r4,6 + bst r21,1 + bld r8,6 + bst r21,2 + bld r12,6 + bst r21,3 + bld r24,6 + bst r21,4 + bld r4,7 + bst r21,5 + bld r8,7 + bst r21,6 + bld r12,7 + bst r21,7 + bld r24,7 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r20,Z+14 + ldd r21,Z+15 + bst r18,0 + bld r5,0 + bst r18,1 + bld r9,0 + bst r18,2 + bld r13,0 + bst r18,3 + bld r25,0 + bst r18,4 + bld r5,1 + bst r18,5 + bld r9,1 + bst r18,6 + bld r13,1 + bst r18,7 + bld r25,1 + bst r19,0 + bld r5,2 + bst r19,1 + bld r9,2 + bst r19,2 + bld r13,2 + bst r19,3 + bld r25,2 + bst r19,4 + bld r5,3 + bst r19,5 + bld r9,3 + bst r19,6 + bld r13,3 + bst r19,7 + bld r25,3 + bst r20,0 + bld r5,4 + bst r20,1 + bld r9,4 + bst r20,2 + bld r13,4 + bst r20,3 + bld r25,4 + bst r20,4 + bld r5,5 + bst r20,5 + bld r9,5 + bst r20,6 + bld r13,5 + bst r20,7 + bld r25,5 + bst r21,0 + bld r5,6 + bst r21,1 + bld r9,6 + bst r21,2 + bld r13,6 + bst r21,3 + bld r25,6 + bst r21,4 + bld r5,7 + bst r21,5 + bld r9,7 + bst r21,6 + bld r13,7 + bst r21,7 + bld r25,7 + ldi r22,225 + ldi r23,240 + ldi r26,210 + ldi r27,150 + rcall 621f + ldi r22,195 + ldi r23,210 + ldi r26,240 + ldi r27,180 + rcall 621f + ldi r22,135 + ldi r23,150 + ldi r26,180 + ldi r27,240 + rcall 621f + ldi r22,30 + ldi r23,15 + ldi r26,45 + ldi r27,105 + rcall 621f + ldi r22,45 + ldi r23,60 + ldi r26,30 + ldi r27,90 + rcall 621f + ldi r22,75 + ldi r23,90 + ldi r26,120 + ldi r27,60 + rcall 621f + ldi r22,150 + ldi r23,135 + ldi r26,165 + ldi r27,225 + rcall 621f + ldi r22,60 + ldi r23,45 + ldi r26,15 + ldi r27,75 + rcall 621f + ldi r22,105 + ldi r23,120 + ldi r26,90 + ldi r27,30 + rcall 621f + ldi r22,210 + ldi r23,195 + ldi r26,225 + ldi r27,165 + rcall 621f + ldi r22,165 + ldi r23,180 + ldi r26,150 + ldi r27,210 + rcall 621f + ldi r22,90 + ldi r23,75 + ldi r26,105 + ldi r27,45 + rcall 621f + rjmp 1960f +621: + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r2,r18 + eor r3,r19 + eor r4,r20 + eor r5,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r6,r18 + eor r7,r19 + eor r8,r20 + eor r9,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r10,r18 + eor r11,r19 + eor r12,r20 + eor r13,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r14,r18 + eor r15,r19 + eor r24,r20 + eor r25,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + eor r6,r10 + mov r0,r10 + and r0,r6 + eor r14,r0 + mov r18,r14 + and r14,r6 + eor r14,r10 + mov r16,r14 + eor r14,r2 + com r14 + mov r10,r14 + or r16,r2 + eor r2,r18 + eor r6,r2 + or r10,r6 + eor r10,r18 + eor r6,r16 + eor r14,r6 + eor r7,r11 + mov r0,r11 + and r0,r7 + eor r15,r0 + mov r19,r15 + and r15,r7 + eor r15,r11 + mov r16,r15 + eor r15,r3 + com r15 + mov r11,r15 + or r16,r3 + eor r3,r19 + eor r7,r3 + or r11,r7 + eor r11,r19 + eor r7,r16 + eor r15,r7 + eor r8,r12 + mov r0,r12 + and r0,r8 + eor r24,r0 + mov r20,r24 + and r24,r8 + eor r24,r12 + mov r16,r24 + eor r24,r4 + com r24 + mov r12,r24 + or r16,r4 + eor r4,r20 + eor r8,r4 + or r12,r8 + eor r12,r20 + eor r8,r16 + eor r24,r8 + eor r9,r13 + mov r0,r13 + and r0,r9 + eor r25,r0 + mov r21,r25 + and r25,r9 + eor r25,r13 + mov r16,r25 + eor r25,r5 + com r25 + mov r13,r25 + or r16,r5 + eor r5,r21 + eor r9,r5 + or r13,r9 + eor r13,r21 + eor r9,r16 + eor r25,r9 + bst r3,0 + lsr r3 + bld r3,7 + bst r7,0 + lsr r7 + bld r7,7 + bst r11,0 + lsr r11 + bld r11,7 + bst r15,0 + lsr r15 + bld r15,7 + mov r0,r1 + lsr r4 + ror r0 + lsr r4 + ror r0 + or r4,r0 + mov r0,r1 + lsr r8 + ror r0 + lsr r8 + ror r0 + or r8,r0 + mov r0,r1 + lsr r12 + ror r0 + lsr r12 + ror r0 + or r12,r0 + mov r0,r1 + lsr r24 + ror r0 + lsr r24 + ror r0 + or r24,r0 + mov r0,r1 + lsr r5 + ror r0 + lsr r5 + ror r0 + lsr r5 + ror r0 + or r5,r0 + mov r0,r1 + lsr r9 + ror r0 + lsr r9 + ror r0 + lsr r9 + ror r0 + or r9,r0 + mov r0,r1 + lsr r13 + ror r0 + lsr r13 + ror r0 + lsr r13 + ror r0 + or r13,r0 + mov r0,r1 + lsr r25 + ror r0 + lsr r25 + ror r0 + lsr r25 + ror r0 + or r25,r0 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Z+16 + ldd r3,Z+17 + ldd r4,Z+18 + ldd r5,Z+19 + ldd r6,Z+20 + ldd r7,Z+21 + ldd r8,Z+22 + ldd r9,Z+23 + ldd r10,Z+24 + ldd r11,Z+25 + ldd r12,Z+26 + ldd r13,Z+27 + ldd r14,Z+28 + ldd r15,Z+29 + ldd r24,Z+30 + ldd r25,Z+31 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r2,r18 + eor r3,r19 + eor r4,r20 + eor r5,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r6,r18 + eor r7,r19 + eor r8,r20 + eor r9,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r10,r18 + eor r11,r19 + eor r12,r20 + eor r13,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + eor r14,r22 + eor r15,r23 + eor r24,r26 + eor r25,r27 + eor r6,r10 + mov r0,r10 + and r0,r6 + eor r14,r0 + mov r18,r14 + and r14,r6 + eor r14,r10 + mov r22,r14 + eor r14,r2 + com r14 + mov r10,r14 + or r22,r2 + eor r2,r18 + eor r6,r2 + or r10,r6 + eor r10,r18 + eor r6,r22 + eor r14,r6 + eor r7,r11 + mov r0,r11 + and r0,r7 + eor r15,r0 + mov r19,r15 + and r15,r7 + eor r15,r11 + mov r22,r15 + eor r15,r3 + com r15 + mov r11,r15 + or r22,r3 + eor r3,r19 + eor r7,r3 + or r11,r7 + eor r11,r19 + eor r7,r22 + eor r15,r7 + eor r8,r12 + mov r0,r12 + and r0,r8 + eor r24,r0 + mov r20,r24 + and r24,r8 + eor r24,r12 + mov r22,r24 + eor r24,r4 + com r24 + mov r12,r24 + or r22,r4 + eor r4,r20 + eor r8,r4 + or r12,r8 + eor r12,r20 + eor r8,r22 + eor r24,r8 + eor r9,r13 + mov r0,r13 + and r0,r9 + eor r25,r0 + mov r21,r25 + and r25,r9 + eor r25,r13 + mov r22,r25 + eor r25,r5 + com r25 + mov r13,r25 + or r22,r5 + eor r5,r21 + eor r9,r5 + or r13,r9 + eor r13,r21 + eor r9,r22 + eor r25,r9 + swap r2 + swap r6 + swap r10 + swap r14 + lsl r3 + adc r3,r1 + lsl r3 + adc r3,r1 + lsl r3 + adc r3,r1 + lsl r7 + adc r7,r1 + lsl r7 + adc r7,r1 + lsl r7 + adc r7,r1 + lsl r11 + adc r11,r1 + lsl r11 + adc r11,r1 + lsl r11 + adc r11,r1 + lsl r15 + adc r15,r1 + lsl r15 + adc r15,r1 + lsl r15 + adc r15,r1 + lsl r4 + adc r4,r1 + lsl r4 + adc r4,r1 + lsl r8 + adc r8,r1 + lsl r8 + adc r8,r1 + lsl r12 + adc r12,r1 + lsl r12 + adc r12,r1 + lsl r24 + adc r24,r1 + lsl r24 + adc r24,r1 + lsl r5 + adc r5,r1 + lsl r9 + adc r9,r1 + lsl r13 + adc r13,r1 + lsl r25 + adc r25,r1 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r6,Y+2 + ldd r10,Y+3 + ldd r14,Y+4 + ldd r3,Y+5 + ldd r7,Y+6 + ldd r11,Y+7 + ldd r15,Y+8 + ldd r4,Y+9 + ldd r8,Y+10 + ldd r12,Y+11 + ldd r24,Y+12 + ldd r5,Y+13 + ldd r9,Y+14 + ldd r13,Y+15 + ldd r25,Y+16 + movw r22,r2 + movw r26,r4 + eor r22,r27 + mov r18,r27 + mov r19,r22 + mov r20,r23 + mov r21,r26 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + st Z,r18 + std Z+4,r19 + std Z+8,r20 + std Z+12,r21 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + movw r18,r26 + movw r20,r22 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+1,r18 + std Z+5,r19 + std Z+9,r20 + std Z+13,r21 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + movw r18,r26 + movw r20,r22 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+2,r18 + std Z+6,r19 + std Z+10,r20 + std Z+14,r21 + movw r18,r2 + movw r20,r4 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + std Z+3,r18 + std Z+7,r19 + std Z+11,r20 + std Z+15,r21 + movw r18,r2 + movw r20,r4 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+16,r18 + std Z+20,r19 + std Z+24,r20 + std Z+28,r21 + movw r18,r2 + movw r20,r4 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+17,r18 + std Z+21,r19 + std Z+25,r20 + std Z+29,r21 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + movw r18,r26 + movw r20,r22 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+18,r18 + std Z+22,r19 + std Z+26,r20 + std Z+30,r21 + movw r18,r2 + movw r20,r4 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+19,r18 + std Z+23,r19 + std Z+27,r20 + std Z+31,r21 + ldd r2,Y+17 + ldd r6,Y+18 + ldd r10,Y+19 + ldd r14,Y+20 + ldd r3,Y+21 + ldd r7,Y+22 + ldd r11,Y+23 + ldd r15,Y+24 + ldd r4,Y+25 + ldd r8,Y+26 + ldd r12,Y+27 + ldd r24,Y+28 + ldd r5,Y+29 + ldd r9,Y+30 + ldd r13,Y+31 + ldd r25,Y+32 + ld r18,Z + ldd r19,Z+4 + ldd r20,Z+8 + ldd r21,Z+12 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + st Z,r18 + std Z+4,r19 + std Z+8,r20 + std Z+12,r21 + ldd r18,Z+1 + ldd r19,Z+5 + ldd r20,Z+9 + ldd r21,Z+13 + eor r18,r2 + eor r19,r3 + eor r20,r4 + eor r21,r5 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + std Z+1,r18 + std Z+5,r19 + std Z+9,r20 + std Z+13,r21 + ldd r18,Z+2 + ldd r19,Z+6 + ldd r20,Z+10 + ldd r21,Z+14 + eor r18,r2 + eor r19,r3 + eor r20,r4 + eor r21,r5 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+2,r18 + std Z+6,r19 + std Z+10,r20 + std Z+14,r21 + ldd r18,Z+3 + ldd r19,Z+7 + ldd r20,Z+11 + ldd r21,Z+15 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+3,r18 + std Z+7,r19 + std Z+11,r20 + std Z+15,r21 + ldd r18,Z+16 + ldd r19,Z+20 + ldd r20,Z+24 + ldd r21,Z+28 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+16,r18 + std Z+20,r19 + std Z+24,r20 + std Z+28,r21 + ldd r18,Z+17 + ldd r19,Z+21 + ldd r20,Z+25 + ldd r21,Z+29 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + std Z+17,r18 + std Z+21,r19 + std Z+25,r20 + std Z+29,r21 + ldd r18,Z+18 + ldd r19,Z+22 + ldd r20,Z+26 + ldd r21,Z+30 + eor r18,r2 + eor r19,r3 + eor r20,r4 + eor r21,r5 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+18,r18 + std Z+22,r19 + std Z+26,r20 + std Z+30,r21 + ldd r18,Z+19 + ldd r19,Z+23 + ldd r20,Z+27 + ldd r21,Z+31 + eor r18,r2 + eor r19,r3 + eor r20,r4 + eor r21,r5 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + std Z+19,r18 + std Z+23,r19 + std Z+27,r20 + std Z+31,r21 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r24,Z+14 + ldd r25,Z+15 + ret +1960: + bst r2,0 + bld r18,0 + bst r6,0 + bld r18,1 + bst r10,0 + bld r18,2 + bst r14,0 + bld r18,3 + bst r2,1 + bld r18,4 + bst r6,1 + bld r18,5 + bst r10,1 + bld r18,6 + bst r14,1 + bld r18,7 + bst r2,2 + bld r19,0 + bst r6,2 + bld r19,1 + bst r10,2 + bld r19,2 + bst r14,2 + bld r19,3 + bst r2,3 + bld r19,4 + bst r6,3 + bld r19,5 + bst r10,3 + bld r19,6 + bst r14,3 + bld r19,7 + bst r2,4 + bld r20,0 + bst r6,4 + bld r20,1 + bst r10,4 + bld r20,2 + bst r14,4 + bld r20,3 + bst r2,5 + bld r20,4 + bst r6,5 + bld r20,5 + bst r10,5 + bld r20,6 + bst r14,5 + bld r20,7 + bst r2,6 + bld r21,0 + bst r6,6 + bld r21,1 + bst r10,6 + bld r21,2 + bst r14,6 + bld r21,3 + bst r2,7 + bld r21,4 + bst r6,7 + bld r21,5 + bst r10,7 + bld r21,6 + bst r14,7 + bld r21,7 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + bst r3,0 + bld r18,0 + bst r7,0 + bld r18,1 + bst r11,0 + bld r18,2 + bst r15,0 + bld r18,3 + bst r3,1 + bld r18,4 + bst r7,1 + bld r18,5 + bst r11,1 + bld r18,6 + bst r15,1 + bld r18,7 + bst r3,2 + bld r19,0 + bst r7,2 + bld r19,1 + bst r11,2 + bld r19,2 + bst r15,2 + bld r19,3 + bst r3,3 + bld r19,4 + bst r7,3 + bld r19,5 + bst r11,3 + bld r19,6 + bst r15,3 + bld r19,7 + bst r3,4 + bld r20,0 + bst r7,4 + bld r20,1 + bst r11,4 + bld r20,2 + bst r15,4 + bld r20,3 + bst r3,5 + bld r20,4 + bst r7,5 + bld r20,5 + bst r11,5 + bld r20,6 + bst r15,5 + bld r20,7 + bst r3,6 + bld r21,0 + bst r7,6 + bld r21,1 + bst r11,6 + bld r21,2 + bst r15,6 + bld r21,3 + bst r3,7 + bld r21,4 + bst r7,7 + bld r21,5 + bst r11,7 + bld r21,6 + bst r15,7 + bld r21,7 + std Z+4,r18 + std Z+5,r19 + std Z+6,r20 + std Z+7,r21 + bst r4,0 + bld r18,0 + bst r8,0 + bld r18,1 + bst r12,0 + bld r18,2 + bst r24,0 + bld r18,3 + bst r4,1 + bld r18,4 + bst r8,1 + bld r18,5 + bst r12,1 + bld r18,6 + bst r24,1 + bld r18,7 + bst r4,2 + bld r19,0 + bst r8,2 + bld r19,1 + bst r12,2 + bld r19,2 + bst r24,2 + bld r19,3 + bst r4,3 + bld r19,4 + bst r8,3 + bld r19,5 + bst r12,3 + bld r19,6 + bst r24,3 + bld r19,7 + bst r4,4 + bld r20,0 + bst r8,4 + bld r20,1 + bst r12,4 + bld r20,2 + bst r24,4 + bld r20,3 + bst r4,5 + bld r20,4 + bst r8,5 + bld r20,5 + bst r12,5 + bld r20,6 + bst r24,5 + bld r20,7 + bst r4,6 + bld r21,0 + bst r8,6 + bld r21,1 + bst r12,6 + bld r21,2 + bst r24,6 + bld r21,3 + bst r4,7 + bld r21,4 + bst r8,7 + bld r21,5 + bst r12,7 + bld r21,6 + bst r24,7 + bld r21,7 + std Z+8,r18 + std Z+9,r19 + std Z+10,r20 + std Z+11,r21 + bst r5,0 + bld r18,0 + bst r9,0 + bld r18,1 + bst r13,0 + bld r18,2 + bst r25,0 + bld r18,3 + bst r5,1 + bld r18,4 + bst r9,1 + bld r18,5 + bst r13,1 + bld r18,6 + bst r25,1 + bld r18,7 + bst r5,2 + bld r19,0 + bst r9,2 + bld r19,1 + bst r13,2 + bld r19,2 + bst r25,2 + bld r19,3 + bst r5,3 + bld r19,4 + bst r9,3 + bld r19,5 + bst r13,3 + bld r19,6 + bst r25,3 + bld r19,7 + bst r5,4 + bld r20,0 + bst r9,4 + bld r20,1 + bst r13,4 + bld r20,2 + bst r25,4 + bld r20,3 + bst r5,5 + bld r20,4 + bst r9,5 + bld r20,5 + bst r13,5 + bld r20,6 + bst r25,5 + bld r20,7 + bst r5,6 + bld r21,0 + bst r9,6 + bld r21,1 + bst r13,6 + bld r21,2 + bst r25,6 + bld r21,3 + bst r5,7 + bld r21,4 + bst r9,7 + bld r21,5 + bst r13,7 + bld r21,6 + bst r25,7 + bld r21,7 + std Z+12,r18 + std Z+13,r19 + std Z+14,r20 + std Z+15,r21 + ldd r2,Z+16 + ldd r3,Z+17 + ldd r4,Z+18 + ldd r5,Z+19 + ldd r6,Z+20 + ldd r7,Z+21 + ldd r8,Z+22 + ldd r9,Z+23 + ldd r10,Z+24 + ldd r11,Z+25 + ldd r12,Z+26 + ldd r13,Z+27 + ldd r14,Z+28 + ldd r15,Z+29 + ldd r24,Z+30 + ldd r25,Z+31 + bst r2,0 + bld r18,0 + bst r6,0 + bld r18,1 + bst r10,0 + bld r18,2 + bst r14,0 + bld r18,3 + bst r2,1 + bld r18,4 + bst r6,1 + bld r18,5 + bst r10,1 + bld r18,6 + bst r14,1 + bld r18,7 + bst r2,2 + bld r19,0 + bst r6,2 + bld r19,1 + bst r10,2 + bld r19,2 + bst r14,2 + bld r19,3 + bst r2,3 + bld r19,4 + bst r6,3 + bld r19,5 + bst r10,3 + bld r19,6 + bst r14,3 + bld r19,7 + bst r2,4 + bld r20,0 + bst r6,4 + bld r20,1 + bst r10,4 + bld r20,2 + bst r14,4 + bld r20,3 + bst r2,5 + bld r20,4 + bst r6,5 + bld r20,5 + bst r10,5 + bld r20,6 + bst r14,5 + bld r20,7 + bst r2,6 + bld r21,0 + bst r6,6 + bld r21,1 + bst r10,6 + bld r21,2 + bst r14,6 + bld r21,3 + bst r2,7 + bld r21,4 + bst r6,7 + bld r21,5 + bst r10,7 + bld r21,6 + bst r14,7 + bld r21,7 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + bst r3,0 + bld r18,0 + bst r7,0 + bld r18,1 + bst r11,0 + bld r18,2 + bst r15,0 + bld r18,3 + bst r3,1 + bld r18,4 + bst r7,1 + bld r18,5 + bst r11,1 + bld r18,6 + bst r15,1 + bld r18,7 + bst r3,2 + bld r19,0 + bst r7,2 + bld r19,1 + bst r11,2 + bld r19,2 + bst r15,2 + bld r19,3 + bst r3,3 + bld r19,4 + bst r7,3 + bld r19,5 + bst r11,3 + bld r19,6 + bst r15,3 + bld r19,7 + bst r3,4 + bld r20,0 + bst r7,4 + bld r20,1 + bst r11,4 + bld r20,2 + bst r15,4 + bld r20,3 + bst r3,5 + bld r20,4 + bst r7,5 + bld r20,5 + bst r11,5 + bld r20,6 + bst r15,5 + bld r20,7 + bst r3,6 + bld r21,0 + bst r7,6 + bld r21,1 + bst r11,6 + bld r21,2 + bst r15,6 + bld r21,3 + bst r3,7 + bld r21,4 + bst r7,7 + bld r21,5 + bst r11,7 + bld r21,6 + bst r15,7 + bld r21,7 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + bst r4,0 + bld r18,0 + bst r8,0 + bld r18,1 + bst r12,0 + bld r18,2 + bst r24,0 + bld r18,3 + bst r4,1 + bld r18,4 + bst r8,1 + bld r18,5 + bst r12,1 + bld r18,6 + bst r24,1 + bld r18,7 + bst r4,2 + bld r19,0 + bst r8,2 + bld r19,1 + bst r12,2 + bld r19,2 + bst r24,2 + bld r19,3 + bst r4,3 + bld r19,4 + bst r8,3 + bld r19,5 + bst r12,3 + bld r19,6 + bst r24,3 + bld r19,7 + bst r4,4 + bld r20,0 + bst r8,4 + bld r20,1 + bst r12,4 + bld r20,2 + bst r24,4 + bld r20,3 + bst r4,5 + bld r20,4 + bst r8,5 + bld r20,5 + bst r12,5 + bld r20,6 + bst r24,5 + bld r20,7 + bst r4,6 + bld r21,0 + bst r8,6 + bld r21,1 + bst r12,6 + bld r21,2 + bst r24,6 + bld r21,3 + bst r4,7 + bld r21,4 + bst r8,7 + bld r21,5 + bst r12,7 + bld r21,6 + bst r24,7 + bld r21,7 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + bst r5,0 + bld r18,0 + bst r9,0 + bld r18,1 + bst r13,0 + bld r18,2 + bst r25,0 + bld r18,3 + bst r5,1 + bld r18,4 + bst r9,1 + bld r18,5 + bst r13,1 + bld r18,6 + bst r25,1 + bld r18,7 + bst r5,2 + bld r19,0 + bst r9,2 + bld r19,1 + bst r13,2 + bld r19,2 + bst r25,2 + bld r19,3 + bst r5,3 + bld r19,4 + bst r9,3 + bld r19,5 + bst r13,3 + bld r19,6 + bst r25,3 + bld r19,7 + bst r5,4 + bld r20,0 + bst r9,4 + bld r20,1 + bst r13,4 + bld r20,2 + bst r25,4 + bld r20,3 + bst r5,5 + bld r20,4 + bst r9,5 + bld r20,5 + bst r13,5 + bld r20,6 + bst r25,5 + bld r20,7 + bst r5,6 + bld r21,0 + bst r9,6 + bld r21,1 + bst r13,6 + bld r21,2 + bst r25,6 + bld r21,3 + bst r5,7 + bld r21,4 + bst r9,7 + bld r21,5 + bst r13,7 + bld r21,6 + bst r25,7 + bld r21,7 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size photon256_permute, .-photon256_permute + +#endif diff --git a/orange/Implementations/crypto_hash/orangishv1/rhys/internal-photon256.c b/orange/Implementations/crypto_hash/orangishv1/rhys/internal-photon256.c index b8743fe..5cb7dd1 100644 --- a/orange/Implementations/crypto_hash/orangishv1/rhys/internal-photon256.c +++ b/orange/Implementations/crypto_hash/orangishv1/rhys/internal-photon256.c @@ -23,6 +23,8 @@ #include "internal-photon256.h" #include "internal-util.h" +#if !defined(__AVR__) + /** * \brief Number of rounds in the PHOTON-256 permutation in bit-sliced form. */ @@ -477,3 +479,5 @@ void photon256_permute(unsigned char state[PHOTON256_STATE_SIZE]) /* Convert back from bit-sliced form to regular form */ photon256_from_sliced(state, S.bytes); } + +#endif /* !__AVR__ */ diff --git a/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/rhys/internal-photon256-avr.S b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/rhys/internal-photon256-avr.S new file mode 100644 index 0000000..5826dd3 --- /dev/null +++ b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/rhys/internal-photon256-avr.S @@ -0,0 +1,2583 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .text +.global photon256_permute + .type photon256_permute, @function +photon256_permute: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 49 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + bst r18,0 + bld r2,0 + bst r18,1 + bld r6,0 + bst r18,2 + bld r10,0 + bst r18,3 + bld r14,0 + bst r18,4 + bld r2,1 + bst r18,5 + bld r6,1 + bst r18,6 + bld r10,1 + bst r18,7 + bld r14,1 + bst r19,0 + bld r2,2 + bst r19,1 + bld r6,2 + bst r19,2 + bld r10,2 + bst r19,3 + bld r14,2 + bst r19,4 + bld r2,3 + bst r19,5 + bld r6,3 + bst r19,6 + bld r10,3 + bst r19,7 + bld r14,3 + bst r20,0 + bld r2,4 + bst r20,1 + bld r6,4 + bst r20,2 + bld r10,4 + bst r20,3 + bld r14,4 + bst r20,4 + bld r2,5 + bst r20,5 + bld r6,5 + bst r20,6 + bld r10,5 + bst r20,7 + bld r14,5 + bst r21,0 + bld r2,6 + bst r21,1 + bld r6,6 + bst r21,2 + bld r10,6 + bst r21,3 + bld r14,6 + bst r21,4 + bld r2,7 + bst r21,5 + bld r6,7 + bst r21,6 + bld r10,7 + bst r21,7 + bld r14,7 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + bst r18,0 + bld r3,0 + bst r18,1 + bld r7,0 + bst r18,2 + bld r11,0 + bst r18,3 + bld r15,0 + bst r18,4 + bld r3,1 + bst r18,5 + bld r7,1 + bst r18,6 + bld r11,1 + bst r18,7 + bld r15,1 + bst r19,0 + bld r3,2 + bst r19,1 + bld r7,2 + bst r19,2 + bld r11,2 + bst r19,3 + bld r15,2 + bst r19,4 + bld r3,3 + bst r19,5 + bld r7,3 + bst r19,6 + bld r11,3 + bst r19,7 + bld r15,3 + bst r20,0 + bld r3,4 + bst r20,1 + bld r7,4 + bst r20,2 + bld r11,4 + bst r20,3 + bld r15,4 + bst r20,4 + bld r3,5 + bst r20,5 + bld r7,5 + bst r20,6 + bld r11,5 + bst r20,7 + bld r15,5 + bst r21,0 + bld r3,6 + bst r21,1 + bld r7,6 + bst r21,2 + bld r11,6 + bst r21,3 + bld r15,6 + bst r21,4 + bld r3,7 + bst r21,5 + bld r7,7 + bst r21,6 + bld r11,7 + bst r21,7 + bld r15,7 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + bst r18,0 + bld r4,0 + bst r18,1 + bld r8,0 + bst r18,2 + bld r12,0 + bst r18,3 + bld r24,0 + bst r18,4 + bld r4,1 + bst r18,5 + bld r8,1 + bst r18,6 + bld r12,1 + bst r18,7 + bld r24,1 + bst r19,0 + bld r4,2 + bst r19,1 + bld r8,2 + bst r19,2 + bld r12,2 + bst r19,3 + bld r24,2 + bst r19,4 + bld r4,3 + bst r19,5 + bld r8,3 + bst r19,6 + bld r12,3 + bst r19,7 + bld r24,3 + bst r20,0 + bld r4,4 + bst r20,1 + bld r8,4 + bst r20,2 + bld r12,4 + bst r20,3 + bld r24,4 + bst r20,4 + bld r4,5 + bst r20,5 + bld r8,5 + bst r20,6 + bld r12,5 + bst r20,7 + bld r24,5 + bst r21,0 + bld r4,6 + bst r21,1 + bld r8,6 + bst r21,2 + bld r12,6 + bst r21,3 + bld r24,6 + bst r21,4 + bld r4,7 + bst r21,5 + bld r8,7 + bst r21,6 + bld r12,7 + bst r21,7 + bld r24,7 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + bst r18,0 + bld r5,0 + bst r18,1 + bld r9,0 + bst r18,2 + bld r13,0 + bst r18,3 + bld r25,0 + bst r18,4 + bld r5,1 + bst r18,5 + bld r9,1 + bst r18,6 + bld r13,1 + bst r18,7 + bld r25,1 + bst r19,0 + bld r5,2 + bst r19,1 + bld r9,2 + bst r19,2 + bld r13,2 + bst r19,3 + bld r25,2 + bst r19,4 + bld r5,3 + bst r19,5 + bld r9,3 + bst r19,6 + bld r13,3 + bst r19,7 + bld r25,3 + bst r20,0 + bld r5,4 + bst r20,1 + bld r9,4 + bst r20,2 + bld r13,4 + bst r20,3 + bld r25,4 + bst r20,4 + bld r5,5 + bst r20,5 + bld r9,5 + bst r20,6 + bld r13,5 + bst r20,7 + bld r25,5 + bst r21,0 + bld r5,6 + bst r21,1 + bld r9,6 + bst r21,2 + bld r13,6 + bst r21,3 + bld r25,6 + bst r21,4 + bld r5,7 + bst r21,5 + bld r9,7 + bst r21,6 + bld r13,7 + bst r21,7 + bld r25,7 + std Z+16,r2 + std Z+17,r3 + std Z+18,r4 + std Z+19,r5 + std Z+20,r6 + std Z+21,r7 + std Z+22,r8 + std Z+23,r9 + std Z+24,r10 + std Z+25,r11 + std Z+26,r12 + std Z+27,r13 + std Z+28,r14 + std Z+29,r15 + std Z+30,r24 + std Z+31,r25 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + bst r18,0 + bld r2,0 + bst r18,1 + bld r6,0 + bst r18,2 + bld r10,0 + bst r18,3 + bld r14,0 + bst r18,4 + bld r2,1 + bst r18,5 + bld r6,1 + bst r18,6 + bld r10,1 + bst r18,7 + bld r14,1 + bst r19,0 + bld r2,2 + bst r19,1 + bld r6,2 + bst r19,2 + bld r10,2 + bst r19,3 + bld r14,2 + bst r19,4 + bld r2,3 + bst r19,5 + bld r6,3 + bst r19,6 + bld r10,3 + bst r19,7 + bld r14,3 + bst r20,0 + bld r2,4 + bst r20,1 + bld r6,4 + bst r20,2 + bld r10,4 + bst r20,3 + bld r14,4 + bst r20,4 + bld r2,5 + bst r20,5 + bld r6,5 + bst r20,6 + bld r10,5 + bst r20,7 + bld r14,5 + bst r21,0 + bld r2,6 + bst r21,1 + bld r6,6 + bst r21,2 + bld r10,6 + bst r21,3 + bld r14,6 + bst r21,4 + bld r2,7 + bst r21,5 + bld r6,7 + bst r21,6 + bld r10,7 + bst r21,7 + bld r14,7 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r20,Z+6 + ldd r21,Z+7 + bst r18,0 + bld r3,0 + bst r18,1 + bld r7,0 + bst r18,2 + bld r11,0 + bst r18,3 + bld r15,0 + bst r18,4 + bld r3,1 + bst r18,5 + bld r7,1 + bst r18,6 + bld r11,1 + bst r18,7 + bld r15,1 + bst r19,0 + bld r3,2 + bst r19,1 + bld r7,2 + bst r19,2 + bld r11,2 + bst r19,3 + bld r15,2 + bst r19,4 + bld r3,3 + bst r19,5 + bld r7,3 + bst r19,6 + bld r11,3 + bst r19,7 + bld r15,3 + bst r20,0 + bld r3,4 + bst r20,1 + bld r7,4 + bst r20,2 + bld r11,4 + bst r20,3 + bld r15,4 + bst r20,4 + bld r3,5 + bst r20,5 + bld r7,5 + bst r20,6 + bld r11,5 + bst r20,7 + bld r15,5 + bst r21,0 + bld r3,6 + bst r21,1 + bld r7,6 + bst r21,2 + bld r11,6 + bst r21,3 + bld r15,6 + bst r21,4 + bld r3,7 + bst r21,5 + bld r7,7 + bst r21,6 + bld r11,7 + bst r21,7 + bld r15,7 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r20,Z+10 + ldd r21,Z+11 + bst r18,0 + bld r4,0 + bst r18,1 + bld r8,0 + bst r18,2 + bld r12,0 + bst r18,3 + bld r24,0 + bst r18,4 + bld r4,1 + bst r18,5 + bld r8,1 + bst r18,6 + bld r12,1 + bst r18,7 + bld r24,1 + bst r19,0 + bld r4,2 + bst r19,1 + bld r8,2 + bst r19,2 + bld r12,2 + bst r19,3 + bld r24,2 + bst r19,4 + bld r4,3 + bst r19,5 + bld r8,3 + bst r19,6 + bld r12,3 + bst r19,7 + bld r24,3 + bst r20,0 + bld r4,4 + bst r20,1 + bld r8,4 + bst r20,2 + bld r12,4 + bst r20,3 + bld r24,4 + bst r20,4 + bld r4,5 + bst r20,5 + bld r8,5 + bst r20,6 + bld r12,5 + bst r20,7 + bld r24,5 + bst r21,0 + bld r4,6 + bst r21,1 + bld r8,6 + bst r21,2 + bld r12,6 + bst r21,3 + bld r24,6 + bst r21,4 + bld r4,7 + bst r21,5 + bld r8,7 + bst r21,6 + bld r12,7 + bst r21,7 + bld r24,7 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r20,Z+14 + ldd r21,Z+15 + bst r18,0 + bld r5,0 + bst r18,1 + bld r9,0 + bst r18,2 + bld r13,0 + bst r18,3 + bld r25,0 + bst r18,4 + bld r5,1 + bst r18,5 + bld r9,1 + bst r18,6 + bld r13,1 + bst r18,7 + bld r25,1 + bst r19,0 + bld r5,2 + bst r19,1 + bld r9,2 + bst r19,2 + bld r13,2 + bst r19,3 + bld r25,2 + bst r19,4 + bld r5,3 + bst r19,5 + bld r9,3 + bst r19,6 + bld r13,3 + bst r19,7 + bld r25,3 + bst r20,0 + bld r5,4 + bst r20,1 + bld r9,4 + bst r20,2 + bld r13,4 + bst r20,3 + bld r25,4 + bst r20,4 + bld r5,5 + bst r20,5 + bld r9,5 + bst r20,6 + bld r13,5 + bst r20,7 + bld r25,5 + bst r21,0 + bld r5,6 + bst r21,1 + bld r9,6 + bst r21,2 + bld r13,6 + bst r21,3 + bld r25,6 + bst r21,4 + bld r5,7 + bst r21,5 + bld r9,7 + bst r21,6 + bld r13,7 + bst r21,7 + bld r25,7 + ldi r22,225 + ldi r23,240 + ldi r26,210 + ldi r27,150 + rcall 621f + ldi r22,195 + ldi r23,210 + ldi r26,240 + ldi r27,180 + rcall 621f + ldi r22,135 + ldi r23,150 + ldi r26,180 + ldi r27,240 + rcall 621f + ldi r22,30 + ldi r23,15 + ldi r26,45 + ldi r27,105 + rcall 621f + ldi r22,45 + ldi r23,60 + ldi r26,30 + ldi r27,90 + rcall 621f + ldi r22,75 + ldi r23,90 + ldi r26,120 + ldi r27,60 + rcall 621f + ldi r22,150 + ldi r23,135 + ldi r26,165 + ldi r27,225 + rcall 621f + ldi r22,60 + ldi r23,45 + ldi r26,15 + ldi r27,75 + rcall 621f + ldi r22,105 + ldi r23,120 + ldi r26,90 + ldi r27,30 + rcall 621f + ldi r22,210 + ldi r23,195 + ldi r26,225 + ldi r27,165 + rcall 621f + ldi r22,165 + ldi r23,180 + ldi r26,150 + ldi r27,210 + rcall 621f + ldi r22,90 + ldi r23,75 + ldi r26,105 + ldi r27,45 + rcall 621f + rjmp 1960f +621: + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r2,r18 + eor r3,r19 + eor r4,r20 + eor r5,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r6,r18 + eor r7,r19 + eor r8,r20 + eor r9,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r10,r18 + eor r11,r19 + eor r12,r20 + eor r13,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r14,r18 + eor r15,r19 + eor r24,r20 + eor r25,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + eor r6,r10 + mov r0,r10 + and r0,r6 + eor r14,r0 + mov r18,r14 + and r14,r6 + eor r14,r10 + mov r16,r14 + eor r14,r2 + com r14 + mov r10,r14 + or r16,r2 + eor r2,r18 + eor r6,r2 + or r10,r6 + eor r10,r18 + eor r6,r16 + eor r14,r6 + eor r7,r11 + mov r0,r11 + and r0,r7 + eor r15,r0 + mov r19,r15 + and r15,r7 + eor r15,r11 + mov r16,r15 + eor r15,r3 + com r15 + mov r11,r15 + or r16,r3 + eor r3,r19 + eor r7,r3 + or r11,r7 + eor r11,r19 + eor r7,r16 + eor r15,r7 + eor r8,r12 + mov r0,r12 + and r0,r8 + eor r24,r0 + mov r20,r24 + and r24,r8 + eor r24,r12 + mov r16,r24 + eor r24,r4 + com r24 + mov r12,r24 + or r16,r4 + eor r4,r20 + eor r8,r4 + or r12,r8 + eor r12,r20 + eor r8,r16 + eor r24,r8 + eor r9,r13 + mov r0,r13 + and r0,r9 + eor r25,r0 + mov r21,r25 + and r25,r9 + eor r25,r13 + mov r16,r25 + eor r25,r5 + com r25 + mov r13,r25 + or r16,r5 + eor r5,r21 + eor r9,r5 + or r13,r9 + eor r13,r21 + eor r9,r16 + eor r25,r9 + bst r3,0 + lsr r3 + bld r3,7 + bst r7,0 + lsr r7 + bld r7,7 + bst r11,0 + lsr r11 + bld r11,7 + bst r15,0 + lsr r15 + bld r15,7 + mov r0,r1 + lsr r4 + ror r0 + lsr r4 + ror r0 + or r4,r0 + mov r0,r1 + lsr r8 + ror r0 + lsr r8 + ror r0 + or r8,r0 + mov r0,r1 + lsr r12 + ror r0 + lsr r12 + ror r0 + or r12,r0 + mov r0,r1 + lsr r24 + ror r0 + lsr r24 + ror r0 + or r24,r0 + mov r0,r1 + lsr r5 + ror r0 + lsr r5 + ror r0 + lsr r5 + ror r0 + or r5,r0 + mov r0,r1 + lsr r9 + ror r0 + lsr r9 + ror r0 + lsr r9 + ror r0 + or r9,r0 + mov r0,r1 + lsr r13 + ror r0 + lsr r13 + ror r0 + lsr r13 + ror r0 + or r13,r0 + mov r0,r1 + lsr r25 + ror r0 + lsr r25 + ror r0 + lsr r25 + ror r0 + or r25,r0 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Z+16 + ldd r3,Z+17 + ldd r4,Z+18 + ldd r5,Z+19 + ldd r6,Z+20 + ldd r7,Z+21 + ldd r8,Z+22 + ldd r9,Z+23 + ldd r10,Z+24 + ldd r11,Z+25 + ldd r12,Z+26 + ldd r13,Z+27 + ldd r14,Z+28 + ldd r15,Z+29 + ldd r24,Z+30 + ldd r25,Z+31 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r2,r18 + eor r3,r19 + eor r4,r20 + eor r5,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r6,r18 + eor r7,r19 + eor r8,r20 + eor r9,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r10,r18 + eor r11,r19 + eor r12,r20 + eor r13,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + eor r14,r22 + eor r15,r23 + eor r24,r26 + eor r25,r27 + eor r6,r10 + mov r0,r10 + and r0,r6 + eor r14,r0 + mov r18,r14 + and r14,r6 + eor r14,r10 + mov r22,r14 + eor r14,r2 + com r14 + mov r10,r14 + or r22,r2 + eor r2,r18 + eor r6,r2 + or r10,r6 + eor r10,r18 + eor r6,r22 + eor r14,r6 + eor r7,r11 + mov r0,r11 + and r0,r7 + eor r15,r0 + mov r19,r15 + and r15,r7 + eor r15,r11 + mov r22,r15 + eor r15,r3 + com r15 + mov r11,r15 + or r22,r3 + eor r3,r19 + eor r7,r3 + or r11,r7 + eor r11,r19 + eor r7,r22 + eor r15,r7 + eor r8,r12 + mov r0,r12 + and r0,r8 + eor r24,r0 + mov r20,r24 + and r24,r8 + eor r24,r12 + mov r22,r24 + eor r24,r4 + com r24 + mov r12,r24 + or r22,r4 + eor r4,r20 + eor r8,r4 + or r12,r8 + eor r12,r20 + eor r8,r22 + eor r24,r8 + eor r9,r13 + mov r0,r13 + and r0,r9 + eor r25,r0 + mov r21,r25 + and r25,r9 + eor r25,r13 + mov r22,r25 + eor r25,r5 + com r25 + mov r13,r25 + or r22,r5 + eor r5,r21 + eor r9,r5 + or r13,r9 + eor r13,r21 + eor r9,r22 + eor r25,r9 + swap r2 + swap r6 + swap r10 + swap r14 + lsl r3 + adc r3,r1 + lsl r3 + adc r3,r1 + lsl r3 + adc r3,r1 + lsl r7 + adc r7,r1 + lsl r7 + adc r7,r1 + lsl r7 + adc r7,r1 + lsl r11 + adc r11,r1 + lsl r11 + adc r11,r1 + lsl r11 + adc r11,r1 + lsl r15 + adc r15,r1 + lsl r15 + adc r15,r1 + lsl r15 + adc r15,r1 + lsl r4 + adc r4,r1 + lsl r4 + adc r4,r1 + lsl r8 + adc r8,r1 + lsl r8 + adc r8,r1 + lsl r12 + adc r12,r1 + lsl r12 + adc r12,r1 + lsl r24 + adc r24,r1 + lsl r24 + adc r24,r1 + lsl r5 + adc r5,r1 + lsl r9 + adc r9,r1 + lsl r13 + adc r13,r1 + lsl r25 + adc r25,r1 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r6,Y+2 + ldd r10,Y+3 + ldd r14,Y+4 + ldd r3,Y+5 + ldd r7,Y+6 + ldd r11,Y+7 + ldd r15,Y+8 + ldd r4,Y+9 + ldd r8,Y+10 + ldd r12,Y+11 + ldd r24,Y+12 + ldd r5,Y+13 + ldd r9,Y+14 + ldd r13,Y+15 + ldd r25,Y+16 + movw r22,r2 + movw r26,r4 + eor r22,r27 + mov r18,r27 + mov r19,r22 + mov r20,r23 + mov r21,r26 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + st Z,r18 + std Z+4,r19 + std Z+8,r20 + std Z+12,r21 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + movw r18,r26 + movw r20,r22 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+1,r18 + std Z+5,r19 + std Z+9,r20 + std Z+13,r21 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + movw r18,r26 + movw r20,r22 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+2,r18 + std Z+6,r19 + std Z+10,r20 + std Z+14,r21 + movw r18,r2 + movw r20,r4 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + std Z+3,r18 + std Z+7,r19 + std Z+11,r20 + std Z+15,r21 + movw r18,r2 + movw r20,r4 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+16,r18 + std Z+20,r19 + std Z+24,r20 + std Z+28,r21 + movw r18,r2 + movw r20,r4 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+17,r18 + std Z+21,r19 + std Z+25,r20 + std Z+29,r21 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + movw r18,r26 + movw r20,r22 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+18,r18 + std Z+22,r19 + std Z+26,r20 + std Z+30,r21 + movw r18,r2 + movw r20,r4 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+19,r18 + std Z+23,r19 + std Z+27,r20 + std Z+31,r21 + ldd r2,Y+17 + ldd r6,Y+18 + ldd r10,Y+19 + ldd r14,Y+20 + ldd r3,Y+21 + ldd r7,Y+22 + ldd r11,Y+23 + ldd r15,Y+24 + ldd r4,Y+25 + ldd r8,Y+26 + ldd r12,Y+27 + ldd r24,Y+28 + ldd r5,Y+29 + ldd r9,Y+30 + ldd r13,Y+31 + ldd r25,Y+32 + ld r18,Z + ldd r19,Z+4 + ldd r20,Z+8 + ldd r21,Z+12 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + st Z,r18 + std Z+4,r19 + std Z+8,r20 + std Z+12,r21 + ldd r18,Z+1 + ldd r19,Z+5 + ldd r20,Z+9 + ldd r21,Z+13 + eor r18,r2 + eor r19,r3 + eor r20,r4 + eor r21,r5 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + std Z+1,r18 + std Z+5,r19 + std Z+9,r20 + std Z+13,r21 + ldd r18,Z+2 + ldd r19,Z+6 + ldd r20,Z+10 + ldd r21,Z+14 + eor r18,r2 + eor r19,r3 + eor r20,r4 + eor r21,r5 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+2,r18 + std Z+6,r19 + std Z+10,r20 + std Z+14,r21 + ldd r18,Z+3 + ldd r19,Z+7 + ldd r20,Z+11 + ldd r21,Z+15 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+3,r18 + std Z+7,r19 + std Z+11,r20 + std Z+15,r21 + ldd r18,Z+16 + ldd r19,Z+20 + ldd r20,Z+24 + ldd r21,Z+28 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+16,r18 + std Z+20,r19 + std Z+24,r20 + std Z+28,r21 + ldd r18,Z+17 + ldd r19,Z+21 + ldd r20,Z+25 + ldd r21,Z+29 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + std Z+17,r18 + std Z+21,r19 + std Z+25,r20 + std Z+29,r21 + ldd r18,Z+18 + ldd r19,Z+22 + ldd r20,Z+26 + ldd r21,Z+30 + eor r18,r2 + eor r19,r3 + eor r20,r4 + eor r21,r5 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+18,r18 + std Z+22,r19 + std Z+26,r20 + std Z+30,r21 + ldd r18,Z+19 + ldd r19,Z+23 + ldd r20,Z+27 + ldd r21,Z+31 + eor r18,r2 + eor r19,r3 + eor r20,r4 + eor r21,r5 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + std Z+19,r18 + std Z+23,r19 + std Z+27,r20 + std Z+31,r21 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r24,Z+14 + ldd r25,Z+15 + ret +1960: + bst r2,0 + bld r18,0 + bst r6,0 + bld r18,1 + bst r10,0 + bld r18,2 + bst r14,0 + bld r18,3 + bst r2,1 + bld r18,4 + bst r6,1 + bld r18,5 + bst r10,1 + bld r18,6 + bst r14,1 + bld r18,7 + bst r2,2 + bld r19,0 + bst r6,2 + bld r19,1 + bst r10,2 + bld r19,2 + bst r14,2 + bld r19,3 + bst r2,3 + bld r19,4 + bst r6,3 + bld r19,5 + bst r10,3 + bld r19,6 + bst r14,3 + bld r19,7 + bst r2,4 + bld r20,0 + bst r6,4 + bld r20,1 + bst r10,4 + bld r20,2 + bst r14,4 + bld r20,3 + bst r2,5 + bld r20,4 + bst r6,5 + bld r20,5 + bst r10,5 + bld r20,6 + bst r14,5 + bld r20,7 + bst r2,6 + bld r21,0 + bst r6,6 + bld r21,1 + bst r10,6 + bld r21,2 + bst r14,6 + bld r21,3 + bst r2,7 + bld r21,4 + bst r6,7 + bld r21,5 + bst r10,7 + bld r21,6 + bst r14,7 + bld r21,7 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + bst r3,0 + bld r18,0 + bst r7,0 + bld r18,1 + bst r11,0 + bld r18,2 + bst r15,0 + bld r18,3 + bst r3,1 + bld r18,4 + bst r7,1 + bld r18,5 + bst r11,1 + bld r18,6 + bst r15,1 + bld r18,7 + bst r3,2 + bld r19,0 + bst r7,2 + bld r19,1 + bst r11,2 + bld r19,2 + bst r15,2 + bld r19,3 + bst r3,3 + bld r19,4 + bst r7,3 + bld r19,5 + bst r11,3 + bld r19,6 + bst r15,3 + bld r19,7 + bst r3,4 + bld r20,0 + bst r7,4 + bld r20,1 + bst r11,4 + bld r20,2 + bst r15,4 + bld r20,3 + bst r3,5 + bld r20,4 + bst r7,5 + bld r20,5 + bst r11,5 + bld r20,6 + bst r15,5 + bld r20,7 + bst r3,6 + bld r21,0 + bst r7,6 + bld r21,1 + bst r11,6 + bld r21,2 + bst r15,6 + bld r21,3 + bst r3,7 + bld r21,4 + bst r7,7 + bld r21,5 + bst r11,7 + bld r21,6 + bst r15,7 + bld r21,7 + std Z+4,r18 + std Z+5,r19 + std Z+6,r20 + std Z+7,r21 + bst r4,0 + bld r18,0 + bst r8,0 + bld r18,1 + bst r12,0 + bld r18,2 + bst r24,0 + bld r18,3 + bst r4,1 + bld r18,4 + bst r8,1 + bld r18,5 + bst r12,1 + bld r18,6 + bst r24,1 + bld r18,7 + bst r4,2 + bld r19,0 + bst r8,2 + bld r19,1 + bst r12,2 + bld r19,2 + bst r24,2 + bld r19,3 + bst r4,3 + bld r19,4 + bst r8,3 + bld r19,5 + bst r12,3 + bld r19,6 + bst r24,3 + bld r19,7 + bst r4,4 + bld r20,0 + bst r8,4 + bld r20,1 + bst r12,4 + bld r20,2 + bst r24,4 + bld r20,3 + bst r4,5 + bld r20,4 + bst r8,5 + bld r20,5 + bst r12,5 + bld r20,6 + bst r24,5 + bld r20,7 + bst r4,6 + bld r21,0 + bst r8,6 + bld r21,1 + bst r12,6 + bld r21,2 + bst r24,6 + bld r21,3 + bst r4,7 + bld r21,4 + bst r8,7 + bld r21,5 + bst r12,7 + bld r21,6 + bst r24,7 + bld r21,7 + std Z+8,r18 + std Z+9,r19 + std Z+10,r20 + std Z+11,r21 + bst r5,0 + bld r18,0 + bst r9,0 + bld r18,1 + bst r13,0 + bld r18,2 + bst r25,0 + bld r18,3 + bst r5,1 + bld r18,4 + bst r9,1 + bld r18,5 + bst r13,1 + bld r18,6 + bst r25,1 + bld r18,7 + bst r5,2 + bld r19,0 + bst r9,2 + bld r19,1 + bst r13,2 + bld r19,2 + bst r25,2 + bld r19,3 + bst r5,3 + bld r19,4 + bst r9,3 + bld r19,5 + bst r13,3 + bld r19,6 + bst r25,3 + bld r19,7 + bst r5,4 + bld r20,0 + bst r9,4 + bld r20,1 + bst r13,4 + bld r20,2 + bst r25,4 + bld r20,3 + bst r5,5 + bld r20,4 + bst r9,5 + bld r20,5 + bst r13,5 + bld r20,6 + bst r25,5 + bld r20,7 + bst r5,6 + bld r21,0 + bst r9,6 + bld r21,1 + bst r13,6 + bld r21,2 + bst r25,6 + bld r21,3 + bst r5,7 + bld r21,4 + bst r9,7 + bld r21,5 + bst r13,7 + bld r21,6 + bst r25,7 + bld r21,7 + std Z+12,r18 + std Z+13,r19 + std Z+14,r20 + std Z+15,r21 + ldd r2,Z+16 + ldd r3,Z+17 + ldd r4,Z+18 + ldd r5,Z+19 + ldd r6,Z+20 + ldd r7,Z+21 + ldd r8,Z+22 + ldd r9,Z+23 + ldd r10,Z+24 + ldd r11,Z+25 + ldd r12,Z+26 + ldd r13,Z+27 + ldd r14,Z+28 + ldd r15,Z+29 + ldd r24,Z+30 + ldd r25,Z+31 + bst r2,0 + bld r18,0 + bst r6,0 + bld r18,1 + bst r10,0 + bld r18,2 + bst r14,0 + bld r18,3 + bst r2,1 + bld r18,4 + bst r6,1 + bld r18,5 + bst r10,1 + bld r18,6 + bst r14,1 + bld r18,7 + bst r2,2 + bld r19,0 + bst r6,2 + bld r19,1 + bst r10,2 + bld r19,2 + bst r14,2 + bld r19,3 + bst r2,3 + bld r19,4 + bst r6,3 + bld r19,5 + bst r10,3 + bld r19,6 + bst r14,3 + bld r19,7 + bst r2,4 + bld r20,0 + bst r6,4 + bld r20,1 + bst r10,4 + bld r20,2 + bst r14,4 + bld r20,3 + bst r2,5 + bld r20,4 + bst r6,5 + bld r20,5 + bst r10,5 + bld r20,6 + bst r14,5 + bld r20,7 + bst r2,6 + bld r21,0 + bst r6,6 + bld r21,1 + bst r10,6 + bld r21,2 + bst r14,6 + bld r21,3 + bst r2,7 + bld r21,4 + bst r6,7 + bld r21,5 + bst r10,7 + bld r21,6 + bst r14,7 + bld r21,7 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + bst r3,0 + bld r18,0 + bst r7,0 + bld r18,1 + bst r11,0 + bld r18,2 + bst r15,0 + bld r18,3 + bst r3,1 + bld r18,4 + bst r7,1 + bld r18,5 + bst r11,1 + bld r18,6 + bst r15,1 + bld r18,7 + bst r3,2 + bld r19,0 + bst r7,2 + bld r19,1 + bst r11,2 + bld r19,2 + bst r15,2 + bld r19,3 + bst r3,3 + bld r19,4 + bst r7,3 + bld r19,5 + bst r11,3 + bld r19,6 + bst r15,3 + bld r19,7 + bst r3,4 + bld r20,0 + bst r7,4 + bld r20,1 + bst r11,4 + bld r20,2 + bst r15,4 + bld r20,3 + bst r3,5 + bld r20,4 + bst r7,5 + bld r20,5 + bst r11,5 + bld r20,6 + bst r15,5 + bld r20,7 + bst r3,6 + bld r21,0 + bst r7,6 + bld r21,1 + bst r11,6 + bld r21,2 + bst r15,6 + bld r21,3 + bst r3,7 + bld r21,4 + bst r7,7 + bld r21,5 + bst r11,7 + bld r21,6 + bst r15,7 + bld r21,7 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + bst r4,0 + bld r18,0 + bst r8,0 + bld r18,1 + bst r12,0 + bld r18,2 + bst r24,0 + bld r18,3 + bst r4,1 + bld r18,4 + bst r8,1 + bld r18,5 + bst r12,1 + bld r18,6 + bst r24,1 + bld r18,7 + bst r4,2 + bld r19,0 + bst r8,2 + bld r19,1 + bst r12,2 + bld r19,2 + bst r24,2 + bld r19,3 + bst r4,3 + bld r19,4 + bst r8,3 + bld r19,5 + bst r12,3 + bld r19,6 + bst r24,3 + bld r19,7 + bst r4,4 + bld r20,0 + bst r8,4 + bld r20,1 + bst r12,4 + bld r20,2 + bst r24,4 + bld r20,3 + bst r4,5 + bld r20,4 + bst r8,5 + bld r20,5 + bst r12,5 + bld r20,6 + bst r24,5 + bld r20,7 + bst r4,6 + bld r21,0 + bst r8,6 + bld r21,1 + bst r12,6 + bld r21,2 + bst r24,6 + bld r21,3 + bst r4,7 + bld r21,4 + bst r8,7 + bld r21,5 + bst r12,7 + bld r21,6 + bst r24,7 + bld r21,7 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + bst r5,0 + bld r18,0 + bst r9,0 + bld r18,1 + bst r13,0 + bld r18,2 + bst r25,0 + bld r18,3 + bst r5,1 + bld r18,4 + bst r9,1 + bld r18,5 + bst r13,1 + bld r18,6 + bst r25,1 + bld r18,7 + bst r5,2 + bld r19,0 + bst r9,2 + bld r19,1 + bst r13,2 + bld r19,2 + bst r25,2 + bld r19,3 + bst r5,3 + bld r19,4 + bst r9,3 + bld r19,5 + bst r13,3 + bld r19,6 + bst r25,3 + bld r19,7 + bst r5,4 + bld r20,0 + bst r9,4 + bld r20,1 + bst r13,4 + bld r20,2 + bst r25,4 + bld r20,3 + bst r5,5 + bld r20,4 + bst r9,5 + bld r20,5 + bst r13,5 + bld r20,6 + bst r25,5 + bld r20,7 + bst r5,6 + bld r21,0 + bst r9,6 + bld r21,1 + bst r13,6 + bld r21,2 + bst r25,6 + bld r21,3 + bst r5,7 + bld r21,4 + bst r9,7 + bld r21,5 + bst r13,7 + bld r21,6 + bst r25,7 + bld r21,7 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size photon256_permute, .-photon256_permute + +#endif diff --git a/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/rhys/internal-photon256.c b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/rhys/internal-photon256.c index b8743fe..5cb7dd1 100644 --- a/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/rhys/internal-photon256.c +++ b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/rhys/internal-photon256.c @@ -23,6 +23,8 @@ #include "internal-photon256.h" #include "internal-util.h" +#if !defined(__AVR__) + /** * \brief Number of rounds in the PHOTON-256 permutation in bit-sliced form. */ @@ -477,3 +479,5 @@ void photon256_permute(unsigned char state[PHOTON256_STATE_SIZE]) /* Convert back from bit-sliced form to regular form */ photon256_from_sliced(state, S.bytes); } + +#endif /* !__AVR__ */ diff --git a/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/rhys/internal-photon256-avr.S b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/rhys/internal-photon256-avr.S new file mode 100644 index 0000000..5826dd3 --- /dev/null +++ b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/rhys/internal-photon256-avr.S @@ -0,0 +1,2583 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .text +.global photon256_permute + .type photon256_permute, @function +photon256_permute: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 49 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + bst r18,0 + bld r2,0 + bst r18,1 + bld r6,0 + bst r18,2 + bld r10,0 + bst r18,3 + bld r14,0 + bst r18,4 + bld r2,1 + bst r18,5 + bld r6,1 + bst r18,6 + bld r10,1 + bst r18,7 + bld r14,1 + bst r19,0 + bld r2,2 + bst r19,1 + bld r6,2 + bst r19,2 + bld r10,2 + bst r19,3 + bld r14,2 + bst r19,4 + bld r2,3 + bst r19,5 + bld r6,3 + bst r19,6 + bld r10,3 + bst r19,7 + bld r14,3 + bst r20,0 + bld r2,4 + bst r20,1 + bld r6,4 + bst r20,2 + bld r10,4 + bst r20,3 + bld r14,4 + bst r20,4 + bld r2,5 + bst r20,5 + bld r6,5 + bst r20,6 + bld r10,5 + bst r20,7 + bld r14,5 + bst r21,0 + bld r2,6 + bst r21,1 + bld r6,6 + bst r21,2 + bld r10,6 + bst r21,3 + bld r14,6 + bst r21,4 + bld r2,7 + bst r21,5 + bld r6,7 + bst r21,6 + bld r10,7 + bst r21,7 + bld r14,7 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + bst r18,0 + bld r3,0 + bst r18,1 + bld r7,0 + bst r18,2 + bld r11,0 + bst r18,3 + bld r15,0 + bst r18,4 + bld r3,1 + bst r18,5 + bld r7,1 + bst r18,6 + bld r11,1 + bst r18,7 + bld r15,1 + bst r19,0 + bld r3,2 + bst r19,1 + bld r7,2 + bst r19,2 + bld r11,2 + bst r19,3 + bld r15,2 + bst r19,4 + bld r3,3 + bst r19,5 + bld r7,3 + bst r19,6 + bld r11,3 + bst r19,7 + bld r15,3 + bst r20,0 + bld r3,4 + bst r20,1 + bld r7,4 + bst r20,2 + bld r11,4 + bst r20,3 + bld r15,4 + bst r20,4 + bld r3,5 + bst r20,5 + bld r7,5 + bst r20,6 + bld r11,5 + bst r20,7 + bld r15,5 + bst r21,0 + bld r3,6 + bst r21,1 + bld r7,6 + bst r21,2 + bld r11,6 + bst r21,3 + bld r15,6 + bst r21,4 + bld r3,7 + bst r21,5 + bld r7,7 + bst r21,6 + bld r11,7 + bst r21,7 + bld r15,7 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + bst r18,0 + bld r4,0 + bst r18,1 + bld r8,0 + bst r18,2 + bld r12,0 + bst r18,3 + bld r24,0 + bst r18,4 + bld r4,1 + bst r18,5 + bld r8,1 + bst r18,6 + bld r12,1 + bst r18,7 + bld r24,1 + bst r19,0 + bld r4,2 + bst r19,1 + bld r8,2 + bst r19,2 + bld r12,2 + bst r19,3 + bld r24,2 + bst r19,4 + bld r4,3 + bst r19,5 + bld r8,3 + bst r19,6 + bld r12,3 + bst r19,7 + bld r24,3 + bst r20,0 + bld r4,4 + bst r20,1 + bld r8,4 + bst r20,2 + bld r12,4 + bst r20,3 + bld r24,4 + bst r20,4 + bld r4,5 + bst r20,5 + bld r8,5 + bst r20,6 + bld r12,5 + bst r20,7 + bld r24,5 + bst r21,0 + bld r4,6 + bst r21,1 + bld r8,6 + bst r21,2 + bld r12,6 + bst r21,3 + bld r24,6 + bst r21,4 + bld r4,7 + bst r21,5 + bld r8,7 + bst r21,6 + bld r12,7 + bst r21,7 + bld r24,7 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + bst r18,0 + bld r5,0 + bst r18,1 + bld r9,0 + bst r18,2 + bld r13,0 + bst r18,3 + bld r25,0 + bst r18,4 + bld r5,1 + bst r18,5 + bld r9,1 + bst r18,6 + bld r13,1 + bst r18,7 + bld r25,1 + bst r19,0 + bld r5,2 + bst r19,1 + bld r9,2 + bst r19,2 + bld r13,2 + bst r19,3 + bld r25,2 + bst r19,4 + bld r5,3 + bst r19,5 + bld r9,3 + bst r19,6 + bld r13,3 + bst r19,7 + bld r25,3 + bst r20,0 + bld r5,4 + bst r20,1 + bld r9,4 + bst r20,2 + bld r13,4 + bst r20,3 + bld r25,4 + bst r20,4 + bld r5,5 + bst r20,5 + bld r9,5 + bst r20,6 + bld r13,5 + bst r20,7 + bld r25,5 + bst r21,0 + bld r5,6 + bst r21,1 + bld r9,6 + bst r21,2 + bld r13,6 + bst r21,3 + bld r25,6 + bst r21,4 + bld r5,7 + bst r21,5 + bld r9,7 + bst r21,6 + bld r13,7 + bst r21,7 + bld r25,7 + std Z+16,r2 + std Z+17,r3 + std Z+18,r4 + std Z+19,r5 + std Z+20,r6 + std Z+21,r7 + std Z+22,r8 + std Z+23,r9 + std Z+24,r10 + std Z+25,r11 + std Z+26,r12 + std Z+27,r13 + std Z+28,r14 + std Z+29,r15 + std Z+30,r24 + std Z+31,r25 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + bst r18,0 + bld r2,0 + bst r18,1 + bld r6,0 + bst r18,2 + bld r10,0 + bst r18,3 + bld r14,0 + bst r18,4 + bld r2,1 + bst r18,5 + bld r6,1 + bst r18,6 + bld r10,1 + bst r18,7 + bld r14,1 + bst r19,0 + bld r2,2 + bst r19,1 + bld r6,2 + bst r19,2 + bld r10,2 + bst r19,3 + bld r14,2 + bst r19,4 + bld r2,3 + bst r19,5 + bld r6,3 + bst r19,6 + bld r10,3 + bst r19,7 + bld r14,3 + bst r20,0 + bld r2,4 + bst r20,1 + bld r6,4 + bst r20,2 + bld r10,4 + bst r20,3 + bld r14,4 + bst r20,4 + bld r2,5 + bst r20,5 + bld r6,5 + bst r20,6 + bld r10,5 + bst r20,7 + bld r14,5 + bst r21,0 + bld r2,6 + bst r21,1 + bld r6,6 + bst r21,2 + bld r10,6 + bst r21,3 + bld r14,6 + bst r21,4 + bld r2,7 + bst r21,5 + bld r6,7 + bst r21,6 + bld r10,7 + bst r21,7 + bld r14,7 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r20,Z+6 + ldd r21,Z+7 + bst r18,0 + bld r3,0 + bst r18,1 + bld r7,0 + bst r18,2 + bld r11,0 + bst r18,3 + bld r15,0 + bst r18,4 + bld r3,1 + bst r18,5 + bld r7,1 + bst r18,6 + bld r11,1 + bst r18,7 + bld r15,1 + bst r19,0 + bld r3,2 + bst r19,1 + bld r7,2 + bst r19,2 + bld r11,2 + bst r19,3 + bld r15,2 + bst r19,4 + bld r3,3 + bst r19,5 + bld r7,3 + bst r19,6 + bld r11,3 + bst r19,7 + bld r15,3 + bst r20,0 + bld r3,4 + bst r20,1 + bld r7,4 + bst r20,2 + bld r11,4 + bst r20,3 + bld r15,4 + bst r20,4 + bld r3,5 + bst r20,5 + bld r7,5 + bst r20,6 + bld r11,5 + bst r20,7 + bld r15,5 + bst r21,0 + bld r3,6 + bst r21,1 + bld r7,6 + bst r21,2 + bld r11,6 + bst r21,3 + bld r15,6 + bst r21,4 + bld r3,7 + bst r21,5 + bld r7,7 + bst r21,6 + bld r11,7 + bst r21,7 + bld r15,7 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r20,Z+10 + ldd r21,Z+11 + bst r18,0 + bld r4,0 + bst r18,1 + bld r8,0 + bst r18,2 + bld r12,0 + bst r18,3 + bld r24,0 + bst r18,4 + bld r4,1 + bst r18,5 + bld r8,1 + bst r18,6 + bld r12,1 + bst r18,7 + bld r24,1 + bst r19,0 + bld r4,2 + bst r19,1 + bld r8,2 + bst r19,2 + bld r12,2 + bst r19,3 + bld r24,2 + bst r19,4 + bld r4,3 + bst r19,5 + bld r8,3 + bst r19,6 + bld r12,3 + bst r19,7 + bld r24,3 + bst r20,0 + bld r4,4 + bst r20,1 + bld r8,4 + bst r20,2 + bld r12,4 + bst r20,3 + bld r24,4 + bst r20,4 + bld r4,5 + bst r20,5 + bld r8,5 + bst r20,6 + bld r12,5 + bst r20,7 + bld r24,5 + bst r21,0 + bld r4,6 + bst r21,1 + bld r8,6 + bst r21,2 + bld r12,6 + bst r21,3 + bld r24,6 + bst r21,4 + bld r4,7 + bst r21,5 + bld r8,7 + bst r21,6 + bld r12,7 + bst r21,7 + bld r24,7 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r20,Z+14 + ldd r21,Z+15 + bst r18,0 + bld r5,0 + bst r18,1 + bld r9,0 + bst r18,2 + bld r13,0 + bst r18,3 + bld r25,0 + bst r18,4 + bld r5,1 + bst r18,5 + bld r9,1 + bst r18,6 + bld r13,1 + bst r18,7 + bld r25,1 + bst r19,0 + bld r5,2 + bst r19,1 + bld r9,2 + bst r19,2 + bld r13,2 + bst r19,3 + bld r25,2 + bst r19,4 + bld r5,3 + bst r19,5 + bld r9,3 + bst r19,6 + bld r13,3 + bst r19,7 + bld r25,3 + bst r20,0 + bld r5,4 + bst r20,1 + bld r9,4 + bst r20,2 + bld r13,4 + bst r20,3 + bld r25,4 + bst r20,4 + bld r5,5 + bst r20,5 + bld r9,5 + bst r20,6 + bld r13,5 + bst r20,7 + bld r25,5 + bst r21,0 + bld r5,6 + bst r21,1 + bld r9,6 + bst r21,2 + bld r13,6 + bst r21,3 + bld r25,6 + bst r21,4 + bld r5,7 + bst r21,5 + bld r9,7 + bst r21,6 + bld r13,7 + bst r21,7 + bld r25,7 + ldi r22,225 + ldi r23,240 + ldi r26,210 + ldi r27,150 + rcall 621f + ldi r22,195 + ldi r23,210 + ldi r26,240 + ldi r27,180 + rcall 621f + ldi r22,135 + ldi r23,150 + ldi r26,180 + ldi r27,240 + rcall 621f + ldi r22,30 + ldi r23,15 + ldi r26,45 + ldi r27,105 + rcall 621f + ldi r22,45 + ldi r23,60 + ldi r26,30 + ldi r27,90 + rcall 621f + ldi r22,75 + ldi r23,90 + ldi r26,120 + ldi r27,60 + rcall 621f + ldi r22,150 + ldi r23,135 + ldi r26,165 + ldi r27,225 + rcall 621f + ldi r22,60 + ldi r23,45 + ldi r26,15 + ldi r27,75 + rcall 621f + ldi r22,105 + ldi r23,120 + ldi r26,90 + ldi r27,30 + rcall 621f + ldi r22,210 + ldi r23,195 + ldi r26,225 + ldi r27,165 + rcall 621f + ldi r22,165 + ldi r23,180 + ldi r26,150 + ldi r27,210 + rcall 621f + ldi r22,90 + ldi r23,75 + ldi r26,105 + ldi r27,45 + rcall 621f + rjmp 1960f +621: + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r2,r18 + eor r3,r19 + eor r4,r20 + eor r5,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r6,r18 + eor r7,r19 + eor r8,r20 + eor r9,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r10,r18 + eor r11,r19 + eor r12,r20 + eor r13,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r14,r18 + eor r15,r19 + eor r24,r20 + eor r25,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + eor r6,r10 + mov r0,r10 + and r0,r6 + eor r14,r0 + mov r18,r14 + and r14,r6 + eor r14,r10 + mov r16,r14 + eor r14,r2 + com r14 + mov r10,r14 + or r16,r2 + eor r2,r18 + eor r6,r2 + or r10,r6 + eor r10,r18 + eor r6,r16 + eor r14,r6 + eor r7,r11 + mov r0,r11 + and r0,r7 + eor r15,r0 + mov r19,r15 + and r15,r7 + eor r15,r11 + mov r16,r15 + eor r15,r3 + com r15 + mov r11,r15 + or r16,r3 + eor r3,r19 + eor r7,r3 + or r11,r7 + eor r11,r19 + eor r7,r16 + eor r15,r7 + eor r8,r12 + mov r0,r12 + and r0,r8 + eor r24,r0 + mov r20,r24 + and r24,r8 + eor r24,r12 + mov r16,r24 + eor r24,r4 + com r24 + mov r12,r24 + or r16,r4 + eor r4,r20 + eor r8,r4 + or r12,r8 + eor r12,r20 + eor r8,r16 + eor r24,r8 + eor r9,r13 + mov r0,r13 + and r0,r9 + eor r25,r0 + mov r21,r25 + and r25,r9 + eor r25,r13 + mov r16,r25 + eor r25,r5 + com r25 + mov r13,r25 + or r16,r5 + eor r5,r21 + eor r9,r5 + or r13,r9 + eor r13,r21 + eor r9,r16 + eor r25,r9 + bst r3,0 + lsr r3 + bld r3,7 + bst r7,0 + lsr r7 + bld r7,7 + bst r11,0 + lsr r11 + bld r11,7 + bst r15,0 + lsr r15 + bld r15,7 + mov r0,r1 + lsr r4 + ror r0 + lsr r4 + ror r0 + or r4,r0 + mov r0,r1 + lsr r8 + ror r0 + lsr r8 + ror r0 + or r8,r0 + mov r0,r1 + lsr r12 + ror r0 + lsr r12 + ror r0 + or r12,r0 + mov r0,r1 + lsr r24 + ror r0 + lsr r24 + ror r0 + or r24,r0 + mov r0,r1 + lsr r5 + ror r0 + lsr r5 + ror r0 + lsr r5 + ror r0 + or r5,r0 + mov r0,r1 + lsr r9 + ror r0 + lsr r9 + ror r0 + lsr r9 + ror r0 + or r9,r0 + mov r0,r1 + lsr r13 + ror r0 + lsr r13 + ror r0 + lsr r13 + ror r0 + or r13,r0 + mov r0,r1 + lsr r25 + ror r0 + lsr r25 + ror r0 + lsr r25 + ror r0 + or r25,r0 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Z+16 + ldd r3,Z+17 + ldd r4,Z+18 + ldd r5,Z+19 + ldd r6,Z+20 + ldd r7,Z+21 + ldd r8,Z+22 + ldd r9,Z+23 + ldd r10,Z+24 + ldd r11,Z+25 + ldd r12,Z+26 + ldd r13,Z+27 + ldd r14,Z+28 + ldd r15,Z+29 + ldd r24,Z+30 + ldd r25,Z+31 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r2,r18 + eor r3,r19 + eor r4,r20 + eor r5,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r6,r18 + eor r7,r19 + eor r8,r20 + eor r9,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r10,r18 + eor r11,r19 + eor r12,r20 + eor r13,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + eor r14,r22 + eor r15,r23 + eor r24,r26 + eor r25,r27 + eor r6,r10 + mov r0,r10 + and r0,r6 + eor r14,r0 + mov r18,r14 + and r14,r6 + eor r14,r10 + mov r22,r14 + eor r14,r2 + com r14 + mov r10,r14 + or r22,r2 + eor r2,r18 + eor r6,r2 + or r10,r6 + eor r10,r18 + eor r6,r22 + eor r14,r6 + eor r7,r11 + mov r0,r11 + and r0,r7 + eor r15,r0 + mov r19,r15 + and r15,r7 + eor r15,r11 + mov r22,r15 + eor r15,r3 + com r15 + mov r11,r15 + or r22,r3 + eor r3,r19 + eor r7,r3 + or r11,r7 + eor r11,r19 + eor r7,r22 + eor r15,r7 + eor r8,r12 + mov r0,r12 + and r0,r8 + eor r24,r0 + mov r20,r24 + and r24,r8 + eor r24,r12 + mov r22,r24 + eor r24,r4 + com r24 + mov r12,r24 + or r22,r4 + eor r4,r20 + eor r8,r4 + or r12,r8 + eor r12,r20 + eor r8,r22 + eor r24,r8 + eor r9,r13 + mov r0,r13 + and r0,r9 + eor r25,r0 + mov r21,r25 + and r25,r9 + eor r25,r13 + mov r22,r25 + eor r25,r5 + com r25 + mov r13,r25 + or r22,r5 + eor r5,r21 + eor r9,r5 + or r13,r9 + eor r13,r21 + eor r9,r22 + eor r25,r9 + swap r2 + swap r6 + swap r10 + swap r14 + lsl r3 + adc r3,r1 + lsl r3 + adc r3,r1 + lsl r3 + adc r3,r1 + lsl r7 + adc r7,r1 + lsl r7 + adc r7,r1 + lsl r7 + adc r7,r1 + lsl r11 + adc r11,r1 + lsl r11 + adc r11,r1 + lsl r11 + adc r11,r1 + lsl r15 + adc r15,r1 + lsl r15 + adc r15,r1 + lsl r15 + adc r15,r1 + lsl r4 + adc r4,r1 + lsl r4 + adc r4,r1 + lsl r8 + adc r8,r1 + lsl r8 + adc r8,r1 + lsl r12 + adc r12,r1 + lsl r12 + adc r12,r1 + lsl r24 + adc r24,r1 + lsl r24 + adc r24,r1 + lsl r5 + adc r5,r1 + lsl r9 + adc r9,r1 + lsl r13 + adc r13,r1 + lsl r25 + adc r25,r1 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r6,Y+2 + ldd r10,Y+3 + ldd r14,Y+4 + ldd r3,Y+5 + ldd r7,Y+6 + ldd r11,Y+7 + ldd r15,Y+8 + ldd r4,Y+9 + ldd r8,Y+10 + ldd r12,Y+11 + ldd r24,Y+12 + ldd r5,Y+13 + ldd r9,Y+14 + ldd r13,Y+15 + ldd r25,Y+16 + movw r22,r2 + movw r26,r4 + eor r22,r27 + mov r18,r27 + mov r19,r22 + mov r20,r23 + mov r21,r26 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + st Z,r18 + std Z+4,r19 + std Z+8,r20 + std Z+12,r21 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + movw r18,r26 + movw r20,r22 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+1,r18 + std Z+5,r19 + std Z+9,r20 + std Z+13,r21 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + movw r18,r26 + movw r20,r22 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+2,r18 + std Z+6,r19 + std Z+10,r20 + std Z+14,r21 + movw r18,r2 + movw r20,r4 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + std Z+3,r18 + std Z+7,r19 + std Z+11,r20 + std Z+15,r21 + movw r18,r2 + movw r20,r4 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+16,r18 + std Z+20,r19 + std Z+24,r20 + std Z+28,r21 + movw r18,r2 + movw r20,r4 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+17,r18 + std Z+21,r19 + std Z+25,r20 + std Z+29,r21 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + movw r18,r26 + movw r20,r22 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+18,r18 + std Z+22,r19 + std Z+26,r20 + std Z+30,r21 + movw r18,r2 + movw r20,r4 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+19,r18 + std Z+23,r19 + std Z+27,r20 + std Z+31,r21 + ldd r2,Y+17 + ldd r6,Y+18 + ldd r10,Y+19 + ldd r14,Y+20 + ldd r3,Y+21 + ldd r7,Y+22 + ldd r11,Y+23 + ldd r15,Y+24 + ldd r4,Y+25 + ldd r8,Y+26 + ldd r12,Y+27 + ldd r24,Y+28 + ldd r5,Y+29 + ldd r9,Y+30 + ldd r13,Y+31 + ldd r25,Y+32 + ld r18,Z + ldd r19,Z+4 + ldd r20,Z+8 + ldd r21,Z+12 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + st Z,r18 + std Z+4,r19 + std Z+8,r20 + std Z+12,r21 + ldd r18,Z+1 + ldd r19,Z+5 + ldd r20,Z+9 + ldd r21,Z+13 + eor r18,r2 + eor r19,r3 + eor r20,r4 + eor r21,r5 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + std Z+1,r18 + std Z+5,r19 + std Z+9,r20 + std Z+13,r21 + ldd r18,Z+2 + ldd r19,Z+6 + ldd r20,Z+10 + ldd r21,Z+14 + eor r18,r2 + eor r19,r3 + eor r20,r4 + eor r21,r5 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+2,r18 + std Z+6,r19 + std Z+10,r20 + std Z+14,r21 + ldd r18,Z+3 + ldd r19,Z+7 + ldd r20,Z+11 + ldd r21,Z+15 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+3,r18 + std Z+7,r19 + std Z+11,r20 + std Z+15,r21 + ldd r18,Z+16 + ldd r19,Z+20 + ldd r20,Z+24 + ldd r21,Z+28 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+16,r18 + std Z+20,r19 + std Z+24,r20 + std Z+28,r21 + ldd r18,Z+17 + ldd r19,Z+21 + ldd r20,Z+25 + ldd r21,Z+29 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + std Z+17,r18 + std Z+21,r19 + std Z+25,r20 + std Z+29,r21 + ldd r18,Z+18 + ldd r19,Z+22 + ldd r20,Z+26 + ldd r21,Z+30 + eor r18,r2 + eor r19,r3 + eor r20,r4 + eor r21,r5 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+18,r18 + std Z+22,r19 + std Z+26,r20 + std Z+30,r21 + ldd r18,Z+19 + ldd r19,Z+23 + ldd r20,Z+27 + ldd r21,Z+31 + eor r18,r2 + eor r19,r3 + eor r20,r4 + eor r21,r5 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + std Z+19,r18 + std Z+23,r19 + std Z+27,r20 + std Z+31,r21 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r24,Z+14 + ldd r25,Z+15 + ret +1960: + bst r2,0 + bld r18,0 + bst r6,0 + bld r18,1 + bst r10,0 + bld r18,2 + bst r14,0 + bld r18,3 + bst r2,1 + bld r18,4 + bst r6,1 + bld r18,5 + bst r10,1 + bld r18,6 + bst r14,1 + bld r18,7 + bst r2,2 + bld r19,0 + bst r6,2 + bld r19,1 + bst r10,2 + bld r19,2 + bst r14,2 + bld r19,3 + bst r2,3 + bld r19,4 + bst r6,3 + bld r19,5 + bst r10,3 + bld r19,6 + bst r14,3 + bld r19,7 + bst r2,4 + bld r20,0 + bst r6,4 + bld r20,1 + bst r10,4 + bld r20,2 + bst r14,4 + bld r20,3 + bst r2,5 + bld r20,4 + bst r6,5 + bld r20,5 + bst r10,5 + bld r20,6 + bst r14,5 + bld r20,7 + bst r2,6 + bld r21,0 + bst r6,6 + bld r21,1 + bst r10,6 + bld r21,2 + bst r14,6 + bld r21,3 + bst r2,7 + bld r21,4 + bst r6,7 + bld r21,5 + bst r10,7 + bld r21,6 + bst r14,7 + bld r21,7 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + bst r3,0 + bld r18,0 + bst r7,0 + bld r18,1 + bst r11,0 + bld r18,2 + bst r15,0 + bld r18,3 + bst r3,1 + bld r18,4 + bst r7,1 + bld r18,5 + bst r11,1 + bld r18,6 + bst r15,1 + bld r18,7 + bst r3,2 + bld r19,0 + bst r7,2 + bld r19,1 + bst r11,2 + bld r19,2 + bst r15,2 + bld r19,3 + bst r3,3 + bld r19,4 + bst r7,3 + bld r19,5 + bst r11,3 + bld r19,6 + bst r15,3 + bld r19,7 + bst r3,4 + bld r20,0 + bst r7,4 + bld r20,1 + bst r11,4 + bld r20,2 + bst r15,4 + bld r20,3 + bst r3,5 + bld r20,4 + bst r7,5 + bld r20,5 + bst r11,5 + bld r20,6 + bst r15,5 + bld r20,7 + bst r3,6 + bld r21,0 + bst r7,6 + bld r21,1 + bst r11,6 + bld r21,2 + bst r15,6 + bld r21,3 + bst r3,7 + bld r21,4 + bst r7,7 + bld r21,5 + bst r11,7 + bld r21,6 + bst r15,7 + bld r21,7 + std Z+4,r18 + std Z+5,r19 + std Z+6,r20 + std Z+7,r21 + bst r4,0 + bld r18,0 + bst r8,0 + bld r18,1 + bst r12,0 + bld r18,2 + bst r24,0 + bld r18,3 + bst r4,1 + bld r18,4 + bst r8,1 + bld r18,5 + bst r12,1 + bld r18,6 + bst r24,1 + bld r18,7 + bst r4,2 + bld r19,0 + bst r8,2 + bld r19,1 + bst r12,2 + bld r19,2 + bst r24,2 + bld r19,3 + bst r4,3 + bld r19,4 + bst r8,3 + bld r19,5 + bst r12,3 + bld r19,6 + bst r24,3 + bld r19,7 + bst r4,4 + bld r20,0 + bst r8,4 + bld r20,1 + bst r12,4 + bld r20,2 + bst r24,4 + bld r20,3 + bst r4,5 + bld r20,4 + bst r8,5 + bld r20,5 + bst r12,5 + bld r20,6 + bst r24,5 + bld r20,7 + bst r4,6 + bld r21,0 + bst r8,6 + bld r21,1 + bst r12,6 + bld r21,2 + bst r24,6 + bld r21,3 + bst r4,7 + bld r21,4 + bst r8,7 + bld r21,5 + bst r12,7 + bld r21,6 + bst r24,7 + bld r21,7 + std Z+8,r18 + std Z+9,r19 + std Z+10,r20 + std Z+11,r21 + bst r5,0 + bld r18,0 + bst r9,0 + bld r18,1 + bst r13,0 + bld r18,2 + bst r25,0 + bld r18,3 + bst r5,1 + bld r18,4 + bst r9,1 + bld r18,5 + bst r13,1 + bld r18,6 + bst r25,1 + bld r18,7 + bst r5,2 + bld r19,0 + bst r9,2 + bld r19,1 + bst r13,2 + bld r19,2 + bst r25,2 + bld r19,3 + bst r5,3 + bld r19,4 + bst r9,3 + bld r19,5 + bst r13,3 + bld r19,6 + bst r25,3 + bld r19,7 + bst r5,4 + bld r20,0 + bst r9,4 + bld r20,1 + bst r13,4 + bld r20,2 + bst r25,4 + bld r20,3 + bst r5,5 + bld r20,4 + bst r9,5 + bld r20,5 + bst r13,5 + bld r20,6 + bst r25,5 + bld r20,7 + bst r5,6 + bld r21,0 + bst r9,6 + bld r21,1 + bst r13,6 + bld r21,2 + bst r25,6 + bld r21,3 + bst r5,7 + bld r21,4 + bst r9,7 + bld r21,5 + bst r13,7 + bld r21,6 + bst r25,7 + bld r21,7 + std Z+12,r18 + std Z+13,r19 + std Z+14,r20 + std Z+15,r21 + ldd r2,Z+16 + ldd r3,Z+17 + ldd r4,Z+18 + ldd r5,Z+19 + ldd r6,Z+20 + ldd r7,Z+21 + ldd r8,Z+22 + ldd r9,Z+23 + ldd r10,Z+24 + ldd r11,Z+25 + ldd r12,Z+26 + ldd r13,Z+27 + ldd r14,Z+28 + ldd r15,Z+29 + ldd r24,Z+30 + ldd r25,Z+31 + bst r2,0 + bld r18,0 + bst r6,0 + bld r18,1 + bst r10,0 + bld r18,2 + bst r14,0 + bld r18,3 + bst r2,1 + bld r18,4 + bst r6,1 + bld r18,5 + bst r10,1 + bld r18,6 + bst r14,1 + bld r18,7 + bst r2,2 + bld r19,0 + bst r6,2 + bld r19,1 + bst r10,2 + bld r19,2 + bst r14,2 + bld r19,3 + bst r2,3 + bld r19,4 + bst r6,3 + bld r19,5 + bst r10,3 + bld r19,6 + bst r14,3 + bld r19,7 + bst r2,4 + bld r20,0 + bst r6,4 + bld r20,1 + bst r10,4 + bld r20,2 + bst r14,4 + bld r20,3 + bst r2,5 + bld r20,4 + bst r6,5 + bld r20,5 + bst r10,5 + bld r20,6 + bst r14,5 + bld r20,7 + bst r2,6 + bld r21,0 + bst r6,6 + bld r21,1 + bst r10,6 + bld r21,2 + bst r14,6 + bld r21,3 + bst r2,7 + bld r21,4 + bst r6,7 + bld r21,5 + bst r10,7 + bld r21,6 + bst r14,7 + bld r21,7 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + bst r3,0 + bld r18,0 + bst r7,0 + bld r18,1 + bst r11,0 + bld r18,2 + bst r15,0 + bld r18,3 + bst r3,1 + bld r18,4 + bst r7,1 + bld r18,5 + bst r11,1 + bld r18,6 + bst r15,1 + bld r18,7 + bst r3,2 + bld r19,0 + bst r7,2 + bld r19,1 + bst r11,2 + bld r19,2 + bst r15,2 + bld r19,3 + bst r3,3 + bld r19,4 + bst r7,3 + bld r19,5 + bst r11,3 + bld r19,6 + bst r15,3 + bld r19,7 + bst r3,4 + bld r20,0 + bst r7,4 + bld r20,1 + bst r11,4 + bld r20,2 + bst r15,4 + bld r20,3 + bst r3,5 + bld r20,4 + bst r7,5 + bld r20,5 + bst r11,5 + bld r20,6 + bst r15,5 + bld r20,7 + bst r3,6 + bld r21,0 + bst r7,6 + bld r21,1 + bst r11,6 + bld r21,2 + bst r15,6 + bld r21,3 + bst r3,7 + bld r21,4 + bst r7,7 + bld r21,5 + bst r11,7 + bld r21,6 + bst r15,7 + bld r21,7 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + bst r4,0 + bld r18,0 + bst r8,0 + bld r18,1 + bst r12,0 + bld r18,2 + bst r24,0 + bld r18,3 + bst r4,1 + bld r18,4 + bst r8,1 + bld r18,5 + bst r12,1 + bld r18,6 + bst r24,1 + bld r18,7 + bst r4,2 + bld r19,0 + bst r8,2 + bld r19,1 + bst r12,2 + bld r19,2 + bst r24,2 + bld r19,3 + bst r4,3 + bld r19,4 + bst r8,3 + bld r19,5 + bst r12,3 + bld r19,6 + bst r24,3 + bld r19,7 + bst r4,4 + bld r20,0 + bst r8,4 + bld r20,1 + bst r12,4 + bld r20,2 + bst r24,4 + bld r20,3 + bst r4,5 + bld r20,4 + bst r8,5 + bld r20,5 + bst r12,5 + bld r20,6 + bst r24,5 + bld r20,7 + bst r4,6 + bld r21,0 + bst r8,6 + bld r21,1 + bst r12,6 + bld r21,2 + bst r24,6 + bld r21,3 + bst r4,7 + bld r21,4 + bst r8,7 + bld r21,5 + bst r12,7 + bld r21,6 + bst r24,7 + bld r21,7 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + bst r5,0 + bld r18,0 + bst r9,0 + bld r18,1 + bst r13,0 + bld r18,2 + bst r25,0 + bld r18,3 + bst r5,1 + bld r18,4 + bst r9,1 + bld r18,5 + bst r13,1 + bld r18,6 + bst r25,1 + bld r18,7 + bst r5,2 + bld r19,0 + bst r9,2 + bld r19,1 + bst r13,2 + bld r19,2 + bst r25,2 + bld r19,3 + bst r5,3 + bld r19,4 + bst r9,3 + bld r19,5 + bst r13,3 + bld r19,6 + bst r25,3 + bld r19,7 + bst r5,4 + bld r20,0 + bst r9,4 + bld r20,1 + bst r13,4 + bld r20,2 + bst r25,4 + bld r20,3 + bst r5,5 + bld r20,4 + bst r9,5 + bld r20,5 + bst r13,5 + bld r20,6 + bst r25,5 + bld r20,7 + bst r5,6 + bld r21,0 + bst r9,6 + bld r21,1 + bst r13,6 + bld r21,2 + bst r25,6 + bld r21,3 + bst r5,7 + bld r21,4 + bst r9,7 + bld r21,5 + bst r13,7 + bld r21,6 + bst r25,7 + bld r21,7 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size photon256_permute, .-photon256_permute + +#endif diff --git a/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/rhys/internal-photon256.c b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/rhys/internal-photon256.c index b8743fe..5cb7dd1 100644 --- a/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/rhys/internal-photon256.c +++ b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/rhys/internal-photon256.c @@ -23,6 +23,8 @@ #include "internal-photon256.h" #include "internal-util.h" +#if !defined(__AVR__) + /** * \brief Number of rounds in the PHOTON-256 permutation in bit-sliced form. */ @@ -477,3 +479,5 @@ void photon256_permute(unsigned char state[PHOTON256_STATE_SIZE]) /* Convert back from bit-sliced form to regular form */ photon256_from_sliced(state, S.bytes); } + +#endif /* !__AVR__ */ diff --git a/photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/rhys/internal-photon256-avr.S b/photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/rhys/internal-photon256-avr.S new file mode 100644 index 0000000..5826dd3 --- /dev/null +++ b/photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/rhys/internal-photon256-avr.S @@ -0,0 +1,2583 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .text +.global photon256_permute + .type photon256_permute, @function +photon256_permute: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 49 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + bst r18,0 + bld r2,0 + bst r18,1 + bld r6,0 + bst r18,2 + bld r10,0 + bst r18,3 + bld r14,0 + bst r18,4 + bld r2,1 + bst r18,5 + bld r6,1 + bst r18,6 + bld r10,1 + bst r18,7 + bld r14,1 + bst r19,0 + bld r2,2 + bst r19,1 + bld r6,2 + bst r19,2 + bld r10,2 + bst r19,3 + bld r14,2 + bst r19,4 + bld r2,3 + bst r19,5 + bld r6,3 + bst r19,6 + bld r10,3 + bst r19,7 + bld r14,3 + bst r20,0 + bld r2,4 + bst r20,1 + bld r6,4 + bst r20,2 + bld r10,4 + bst r20,3 + bld r14,4 + bst r20,4 + bld r2,5 + bst r20,5 + bld r6,5 + bst r20,6 + bld r10,5 + bst r20,7 + bld r14,5 + bst r21,0 + bld r2,6 + bst r21,1 + bld r6,6 + bst r21,2 + bld r10,6 + bst r21,3 + bld r14,6 + bst r21,4 + bld r2,7 + bst r21,5 + bld r6,7 + bst r21,6 + bld r10,7 + bst r21,7 + bld r14,7 + ldd r18,Z+20 + ldd r19,Z+21 + ldd r20,Z+22 + ldd r21,Z+23 + bst r18,0 + bld r3,0 + bst r18,1 + bld r7,0 + bst r18,2 + bld r11,0 + bst r18,3 + bld r15,0 + bst r18,4 + bld r3,1 + bst r18,5 + bld r7,1 + bst r18,6 + bld r11,1 + bst r18,7 + bld r15,1 + bst r19,0 + bld r3,2 + bst r19,1 + bld r7,2 + bst r19,2 + bld r11,2 + bst r19,3 + bld r15,2 + bst r19,4 + bld r3,3 + bst r19,5 + bld r7,3 + bst r19,6 + bld r11,3 + bst r19,7 + bld r15,3 + bst r20,0 + bld r3,4 + bst r20,1 + bld r7,4 + bst r20,2 + bld r11,4 + bst r20,3 + bld r15,4 + bst r20,4 + bld r3,5 + bst r20,5 + bld r7,5 + bst r20,6 + bld r11,5 + bst r20,7 + bld r15,5 + bst r21,0 + bld r3,6 + bst r21,1 + bld r7,6 + bst r21,2 + bld r11,6 + bst r21,3 + bld r15,6 + bst r21,4 + bld r3,7 + bst r21,5 + bld r7,7 + bst r21,6 + bld r11,7 + bst r21,7 + bld r15,7 + ldd r18,Z+24 + ldd r19,Z+25 + ldd r20,Z+26 + ldd r21,Z+27 + bst r18,0 + bld r4,0 + bst r18,1 + bld r8,0 + bst r18,2 + bld r12,0 + bst r18,3 + bld r24,0 + bst r18,4 + bld r4,1 + bst r18,5 + bld r8,1 + bst r18,6 + bld r12,1 + bst r18,7 + bld r24,1 + bst r19,0 + bld r4,2 + bst r19,1 + bld r8,2 + bst r19,2 + bld r12,2 + bst r19,3 + bld r24,2 + bst r19,4 + bld r4,3 + bst r19,5 + bld r8,3 + bst r19,6 + bld r12,3 + bst r19,7 + bld r24,3 + bst r20,0 + bld r4,4 + bst r20,1 + bld r8,4 + bst r20,2 + bld r12,4 + bst r20,3 + bld r24,4 + bst r20,4 + bld r4,5 + bst r20,5 + bld r8,5 + bst r20,6 + bld r12,5 + bst r20,7 + bld r24,5 + bst r21,0 + bld r4,6 + bst r21,1 + bld r8,6 + bst r21,2 + bld r12,6 + bst r21,3 + bld r24,6 + bst r21,4 + bld r4,7 + bst r21,5 + bld r8,7 + bst r21,6 + bld r12,7 + bst r21,7 + bld r24,7 + ldd r18,Z+28 + ldd r19,Z+29 + ldd r20,Z+30 + ldd r21,Z+31 + bst r18,0 + bld r5,0 + bst r18,1 + bld r9,0 + bst r18,2 + bld r13,0 + bst r18,3 + bld r25,0 + bst r18,4 + bld r5,1 + bst r18,5 + bld r9,1 + bst r18,6 + bld r13,1 + bst r18,7 + bld r25,1 + bst r19,0 + bld r5,2 + bst r19,1 + bld r9,2 + bst r19,2 + bld r13,2 + bst r19,3 + bld r25,2 + bst r19,4 + bld r5,3 + bst r19,5 + bld r9,3 + bst r19,6 + bld r13,3 + bst r19,7 + bld r25,3 + bst r20,0 + bld r5,4 + bst r20,1 + bld r9,4 + bst r20,2 + bld r13,4 + bst r20,3 + bld r25,4 + bst r20,4 + bld r5,5 + bst r20,5 + bld r9,5 + bst r20,6 + bld r13,5 + bst r20,7 + bld r25,5 + bst r21,0 + bld r5,6 + bst r21,1 + bld r9,6 + bst r21,2 + bld r13,6 + bst r21,3 + bld r25,6 + bst r21,4 + bld r5,7 + bst r21,5 + bld r9,7 + bst r21,6 + bld r13,7 + bst r21,7 + bld r25,7 + std Z+16,r2 + std Z+17,r3 + std Z+18,r4 + std Z+19,r5 + std Z+20,r6 + std Z+21,r7 + std Z+22,r8 + std Z+23,r9 + std Z+24,r10 + std Z+25,r11 + std Z+26,r12 + std Z+27,r13 + std Z+28,r14 + std Z+29,r15 + std Z+30,r24 + std Z+31,r25 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + bst r18,0 + bld r2,0 + bst r18,1 + bld r6,0 + bst r18,2 + bld r10,0 + bst r18,3 + bld r14,0 + bst r18,4 + bld r2,1 + bst r18,5 + bld r6,1 + bst r18,6 + bld r10,1 + bst r18,7 + bld r14,1 + bst r19,0 + bld r2,2 + bst r19,1 + bld r6,2 + bst r19,2 + bld r10,2 + bst r19,3 + bld r14,2 + bst r19,4 + bld r2,3 + bst r19,5 + bld r6,3 + bst r19,6 + bld r10,3 + bst r19,7 + bld r14,3 + bst r20,0 + bld r2,4 + bst r20,1 + bld r6,4 + bst r20,2 + bld r10,4 + bst r20,3 + bld r14,4 + bst r20,4 + bld r2,5 + bst r20,5 + bld r6,5 + bst r20,6 + bld r10,5 + bst r20,7 + bld r14,5 + bst r21,0 + bld r2,6 + bst r21,1 + bld r6,6 + bst r21,2 + bld r10,6 + bst r21,3 + bld r14,6 + bst r21,4 + bld r2,7 + bst r21,5 + bld r6,7 + bst r21,6 + bld r10,7 + bst r21,7 + bld r14,7 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r20,Z+6 + ldd r21,Z+7 + bst r18,0 + bld r3,0 + bst r18,1 + bld r7,0 + bst r18,2 + bld r11,0 + bst r18,3 + bld r15,0 + bst r18,4 + bld r3,1 + bst r18,5 + bld r7,1 + bst r18,6 + bld r11,1 + bst r18,7 + bld r15,1 + bst r19,0 + bld r3,2 + bst r19,1 + bld r7,2 + bst r19,2 + bld r11,2 + bst r19,3 + bld r15,2 + bst r19,4 + bld r3,3 + bst r19,5 + bld r7,3 + bst r19,6 + bld r11,3 + bst r19,7 + bld r15,3 + bst r20,0 + bld r3,4 + bst r20,1 + bld r7,4 + bst r20,2 + bld r11,4 + bst r20,3 + bld r15,4 + bst r20,4 + bld r3,5 + bst r20,5 + bld r7,5 + bst r20,6 + bld r11,5 + bst r20,7 + bld r15,5 + bst r21,0 + bld r3,6 + bst r21,1 + bld r7,6 + bst r21,2 + bld r11,6 + bst r21,3 + bld r15,6 + bst r21,4 + bld r3,7 + bst r21,5 + bld r7,7 + bst r21,6 + bld r11,7 + bst r21,7 + bld r15,7 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r20,Z+10 + ldd r21,Z+11 + bst r18,0 + bld r4,0 + bst r18,1 + bld r8,0 + bst r18,2 + bld r12,0 + bst r18,3 + bld r24,0 + bst r18,4 + bld r4,1 + bst r18,5 + bld r8,1 + bst r18,6 + bld r12,1 + bst r18,7 + bld r24,1 + bst r19,0 + bld r4,2 + bst r19,1 + bld r8,2 + bst r19,2 + bld r12,2 + bst r19,3 + bld r24,2 + bst r19,4 + bld r4,3 + bst r19,5 + bld r8,3 + bst r19,6 + bld r12,3 + bst r19,7 + bld r24,3 + bst r20,0 + bld r4,4 + bst r20,1 + bld r8,4 + bst r20,2 + bld r12,4 + bst r20,3 + bld r24,4 + bst r20,4 + bld r4,5 + bst r20,5 + bld r8,5 + bst r20,6 + bld r12,5 + bst r20,7 + bld r24,5 + bst r21,0 + bld r4,6 + bst r21,1 + bld r8,6 + bst r21,2 + bld r12,6 + bst r21,3 + bld r24,6 + bst r21,4 + bld r4,7 + bst r21,5 + bld r8,7 + bst r21,6 + bld r12,7 + bst r21,7 + bld r24,7 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r20,Z+14 + ldd r21,Z+15 + bst r18,0 + bld r5,0 + bst r18,1 + bld r9,0 + bst r18,2 + bld r13,0 + bst r18,3 + bld r25,0 + bst r18,4 + bld r5,1 + bst r18,5 + bld r9,1 + bst r18,6 + bld r13,1 + bst r18,7 + bld r25,1 + bst r19,0 + bld r5,2 + bst r19,1 + bld r9,2 + bst r19,2 + bld r13,2 + bst r19,3 + bld r25,2 + bst r19,4 + bld r5,3 + bst r19,5 + bld r9,3 + bst r19,6 + bld r13,3 + bst r19,7 + bld r25,3 + bst r20,0 + bld r5,4 + bst r20,1 + bld r9,4 + bst r20,2 + bld r13,4 + bst r20,3 + bld r25,4 + bst r20,4 + bld r5,5 + bst r20,5 + bld r9,5 + bst r20,6 + bld r13,5 + bst r20,7 + bld r25,5 + bst r21,0 + bld r5,6 + bst r21,1 + bld r9,6 + bst r21,2 + bld r13,6 + bst r21,3 + bld r25,6 + bst r21,4 + bld r5,7 + bst r21,5 + bld r9,7 + bst r21,6 + bld r13,7 + bst r21,7 + bld r25,7 + ldi r22,225 + ldi r23,240 + ldi r26,210 + ldi r27,150 + rcall 621f + ldi r22,195 + ldi r23,210 + ldi r26,240 + ldi r27,180 + rcall 621f + ldi r22,135 + ldi r23,150 + ldi r26,180 + ldi r27,240 + rcall 621f + ldi r22,30 + ldi r23,15 + ldi r26,45 + ldi r27,105 + rcall 621f + ldi r22,45 + ldi r23,60 + ldi r26,30 + ldi r27,90 + rcall 621f + ldi r22,75 + ldi r23,90 + ldi r26,120 + ldi r27,60 + rcall 621f + ldi r22,150 + ldi r23,135 + ldi r26,165 + ldi r27,225 + rcall 621f + ldi r22,60 + ldi r23,45 + ldi r26,15 + ldi r27,75 + rcall 621f + ldi r22,105 + ldi r23,120 + ldi r26,90 + ldi r27,30 + rcall 621f + ldi r22,210 + ldi r23,195 + ldi r26,225 + ldi r27,165 + rcall 621f + ldi r22,165 + ldi r23,180 + ldi r26,150 + ldi r27,210 + rcall 621f + ldi r22,90 + ldi r23,75 + ldi r26,105 + ldi r27,45 + rcall 621f + rjmp 1960f +621: + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r2,r18 + eor r3,r19 + eor r4,r20 + eor r5,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r6,r18 + eor r7,r19 + eor r8,r20 + eor r9,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r10,r18 + eor r11,r19 + eor r12,r20 + eor r13,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r14,r18 + eor r15,r19 + eor r24,r20 + eor r25,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + eor r6,r10 + mov r0,r10 + and r0,r6 + eor r14,r0 + mov r18,r14 + and r14,r6 + eor r14,r10 + mov r16,r14 + eor r14,r2 + com r14 + mov r10,r14 + or r16,r2 + eor r2,r18 + eor r6,r2 + or r10,r6 + eor r10,r18 + eor r6,r16 + eor r14,r6 + eor r7,r11 + mov r0,r11 + and r0,r7 + eor r15,r0 + mov r19,r15 + and r15,r7 + eor r15,r11 + mov r16,r15 + eor r15,r3 + com r15 + mov r11,r15 + or r16,r3 + eor r3,r19 + eor r7,r3 + or r11,r7 + eor r11,r19 + eor r7,r16 + eor r15,r7 + eor r8,r12 + mov r0,r12 + and r0,r8 + eor r24,r0 + mov r20,r24 + and r24,r8 + eor r24,r12 + mov r16,r24 + eor r24,r4 + com r24 + mov r12,r24 + or r16,r4 + eor r4,r20 + eor r8,r4 + or r12,r8 + eor r12,r20 + eor r8,r16 + eor r24,r8 + eor r9,r13 + mov r0,r13 + and r0,r9 + eor r25,r0 + mov r21,r25 + and r25,r9 + eor r25,r13 + mov r16,r25 + eor r25,r5 + com r25 + mov r13,r25 + or r16,r5 + eor r5,r21 + eor r9,r5 + or r13,r9 + eor r13,r21 + eor r9,r16 + eor r25,r9 + bst r3,0 + lsr r3 + bld r3,7 + bst r7,0 + lsr r7 + bld r7,7 + bst r11,0 + lsr r11 + bld r11,7 + bst r15,0 + lsr r15 + bld r15,7 + mov r0,r1 + lsr r4 + ror r0 + lsr r4 + ror r0 + or r4,r0 + mov r0,r1 + lsr r8 + ror r0 + lsr r8 + ror r0 + or r8,r0 + mov r0,r1 + lsr r12 + ror r0 + lsr r12 + ror r0 + or r12,r0 + mov r0,r1 + lsr r24 + ror r0 + lsr r24 + ror r0 + or r24,r0 + mov r0,r1 + lsr r5 + ror r0 + lsr r5 + ror r0 + lsr r5 + ror r0 + or r5,r0 + mov r0,r1 + lsr r9 + ror r0 + lsr r9 + ror r0 + lsr r9 + ror r0 + or r9,r0 + mov r0,r1 + lsr r13 + ror r0 + lsr r13 + ror r0 + lsr r13 + ror r0 + or r13,r0 + mov r0,r1 + lsr r25 + ror r0 + lsr r25 + ror r0 + lsr r25 + ror r0 + or r25,r0 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Z+16 + ldd r3,Z+17 + ldd r4,Z+18 + ldd r5,Z+19 + ldd r6,Z+20 + ldd r7,Z+21 + ldd r8,Z+22 + ldd r9,Z+23 + ldd r10,Z+24 + ldd r11,Z+25 + ldd r12,Z+26 + ldd r13,Z+27 + ldd r14,Z+28 + ldd r15,Z+29 + ldd r24,Z+30 + ldd r25,Z+31 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r2,r18 + eor r3,r19 + eor r4,r20 + eor r5,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r6,r18 + eor r7,r19 + eor r8,r20 + eor r9,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + movw r18,r22 + movw r20,r26 + andi r18,1 + andi r19,1 + andi r20,1 + andi r21,1 + eor r10,r18 + eor r11,r19 + eor r12,r20 + eor r13,r21 + lsr r22 + lsr r23 + lsr r26 + lsr r27 + eor r14,r22 + eor r15,r23 + eor r24,r26 + eor r25,r27 + eor r6,r10 + mov r0,r10 + and r0,r6 + eor r14,r0 + mov r18,r14 + and r14,r6 + eor r14,r10 + mov r22,r14 + eor r14,r2 + com r14 + mov r10,r14 + or r22,r2 + eor r2,r18 + eor r6,r2 + or r10,r6 + eor r10,r18 + eor r6,r22 + eor r14,r6 + eor r7,r11 + mov r0,r11 + and r0,r7 + eor r15,r0 + mov r19,r15 + and r15,r7 + eor r15,r11 + mov r22,r15 + eor r15,r3 + com r15 + mov r11,r15 + or r22,r3 + eor r3,r19 + eor r7,r3 + or r11,r7 + eor r11,r19 + eor r7,r22 + eor r15,r7 + eor r8,r12 + mov r0,r12 + and r0,r8 + eor r24,r0 + mov r20,r24 + and r24,r8 + eor r24,r12 + mov r22,r24 + eor r24,r4 + com r24 + mov r12,r24 + or r22,r4 + eor r4,r20 + eor r8,r4 + or r12,r8 + eor r12,r20 + eor r8,r22 + eor r24,r8 + eor r9,r13 + mov r0,r13 + and r0,r9 + eor r25,r0 + mov r21,r25 + and r25,r9 + eor r25,r13 + mov r22,r25 + eor r25,r5 + com r25 + mov r13,r25 + or r22,r5 + eor r5,r21 + eor r9,r5 + or r13,r9 + eor r13,r21 + eor r9,r22 + eor r25,r9 + swap r2 + swap r6 + swap r10 + swap r14 + lsl r3 + adc r3,r1 + lsl r3 + adc r3,r1 + lsl r3 + adc r3,r1 + lsl r7 + adc r7,r1 + lsl r7 + adc r7,r1 + lsl r7 + adc r7,r1 + lsl r11 + adc r11,r1 + lsl r11 + adc r11,r1 + lsl r11 + adc r11,r1 + lsl r15 + adc r15,r1 + lsl r15 + adc r15,r1 + lsl r15 + adc r15,r1 + lsl r4 + adc r4,r1 + lsl r4 + adc r4,r1 + lsl r8 + adc r8,r1 + lsl r8 + adc r8,r1 + lsl r12 + adc r12,r1 + lsl r12 + adc r12,r1 + lsl r24 + adc r24,r1 + lsl r24 + adc r24,r1 + lsl r5 + adc r5,r1 + lsl r9 + adc r9,r1 + lsl r13 + adc r13,r1 + lsl r25 + adc r25,r1 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r6,Y+2 + ldd r10,Y+3 + ldd r14,Y+4 + ldd r3,Y+5 + ldd r7,Y+6 + ldd r11,Y+7 + ldd r15,Y+8 + ldd r4,Y+9 + ldd r8,Y+10 + ldd r12,Y+11 + ldd r24,Y+12 + ldd r5,Y+13 + ldd r9,Y+14 + ldd r13,Y+15 + ldd r25,Y+16 + movw r22,r2 + movw r26,r4 + eor r22,r27 + mov r18,r27 + mov r19,r22 + mov r20,r23 + mov r21,r26 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + st Z,r18 + std Z+4,r19 + std Z+8,r20 + std Z+12,r21 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + movw r18,r26 + movw r20,r22 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+1,r18 + std Z+5,r19 + std Z+9,r20 + std Z+13,r21 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + movw r18,r26 + movw r20,r22 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+2,r18 + std Z+6,r19 + std Z+10,r20 + std Z+14,r21 + movw r18,r2 + movw r20,r4 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + std Z+3,r18 + std Z+7,r19 + std Z+11,r20 + std Z+15,r21 + movw r18,r2 + movw r20,r4 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+16,r18 + std Z+20,r19 + std Z+24,r20 + std Z+28,r21 + movw r18,r2 + movw r20,r4 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+17,r18 + std Z+21,r19 + std Z+25,r20 + std Z+29,r21 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + movw r18,r26 + movw r20,r22 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+18,r18 + std Z+22,r19 + std Z+26,r20 + std Z+30,r21 + movw r18,r2 + movw r20,r4 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+19,r18 + std Z+23,r19 + std Z+27,r20 + std Z+31,r21 + ldd r2,Y+17 + ldd r6,Y+18 + ldd r10,Y+19 + ldd r14,Y+20 + ldd r3,Y+21 + ldd r7,Y+22 + ldd r11,Y+23 + ldd r15,Y+24 + ldd r4,Y+25 + ldd r8,Y+26 + ldd r12,Y+27 + ldd r24,Y+28 + ldd r5,Y+29 + ldd r9,Y+30 + ldd r13,Y+31 + ldd r25,Y+32 + ld r18,Z + ldd r19,Z+4 + ldd r20,Z+8 + ldd r21,Z+12 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + st Z,r18 + std Z+4,r19 + std Z+8,r20 + std Z+12,r21 + ldd r18,Z+1 + ldd r19,Z+5 + ldd r20,Z+9 + ldd r21,Z+13 + eor r18,r2 + eor r19,r3 + eor r20,r4 + eor r21,r5 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + std Z+1,r18 + std Z+5,r19 + std Z+9,r20 + std Z+13,r21 + ldd r18,Z+2 + ldd r19,Z+6 + ldd r20,Z+10 + ldd r21,Z+14 + eor r18,r2 + eor r19,r3 + eor r20,r4 + eor r21,r5 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+2,r18 + std Z+6,r19 + std Z+10,r20 + std Z+14,r21 + ldd r18,Z+3 + ldd r19,Z+7 + ldd r20,Z+11 + ldd r21,Z+15 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+3,r18 + std Z+7,r19 + std Z+11,r20 + std Z+15,r21 + ldd r18,Z+16 + ldd r19,Z+20 + ldd r20,Z+24 + ldd r21,Z+28 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+16,r18 + std Z+20,r19 + std Z+24,r20 + std Z+28,r21 + ldd r18,Z+17 + ldd r19,Z+21 + ldd r20,Z+25 + ldd r21,Z+29 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + std Z+17,r18 + std Z+21,r19 + std Z+25,r20 + std Z+29,r21 + ldd r18,Z+18 + ldd r19,Z+22 + ldd r20,Z+26 + ldd r21,Z+30 + eor r18,r2 + eor r19,r3 + eor r20,r4 + eor r21,r5 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r18,r6 + eor r19,r7 + eor r20,r8 + eor r21,r9 + eor r18,r10 + eor r19,r11 + eor r20,r12 + eor r21,r13 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + std Z+18,r18 + std Z+22,r19 + std Z+26,r20 + std Z+30,r21 + ldd r18,Z+19 + ldd r19,Z+23 + ldd r20,Z+27 + ldd r21,Z+31 + eor r18,r2 + eor r19,r3 + eor r20,r4 + eor r21,r5 + movw r22,r2 + movw r26,r4 + eor r22,r27 + eor r27,r26 + eor r18,r26 + eor r19,r27 + eor r20,r22 + eor r21,r23 + movw r22,r6 + movw r26,r8 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r27,r26 + eor r26,r23 + eor r18,r23 + eor r19,r26 + eor r20,r27 + eor r21,r22 + movw r22,r10 + movw r26,r12 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + eor r18,r14 + eor r19,r15 + eor r20,r24 + eor r21,r25 + movw r22,r14 + movw r26,r24 + eor r22,r27 + eor r18,r27 + eor r19,r22 + eor r20,r23 + eor r21,r26 + std Z+19,r18 + std Z+23,r19 + std Z+27,r20 + std Z+31,r21 + ld r2,Z + ldd r3,Z+1 + ldd r4,Z+2 + ldd r5,Z+3 + ldd r6,Z+4 + ldd r7,Z+5 + ldd r8,Z+6 + ldd r9,Z+7 + ldd r10,Z+8 + ldd r11,Z+9 + ldd r12,Z+10 + ldd r13,Z+11 + ldd r14,Z+12 + ldd r15,Z+13 + ldd r24,Z+14 + ldd r25,Z+15 + ret +1960: + bst r2,0 + bld r18,0 + bst r6,0 + bld r18,1 + bst r10,0 + bld r18,2 + bst r14,0 + bld r18,3 + bst r2,1 + bld r18,4 + bst r6,1 + bld r18,5 + bst r10,1 + bld r18,6 + bst r14,1 + bld r18,7 + bst r2,2 + bld r19,0 + bst r6,2 + bld r19,1 + bst r10,2 + bld r19,2 + bst r14,2 + bld r19,3 + bst r2,3 + bld r19,4 + bst r6,3 + bld r19,5 + bst r10,3 + bld r19,6 + bst r14,3 + bld r19,7 + bst r2,4 + bld r20,0 + bst r6,4 + bld r20,1 + bst r10,4 + bld r20,2 + bst r14,4 + bld r20,3 + bst r2,5 + bld r20,4 + bst r6,5 + bld r20,5 + bst r10,5 + bld r20,6 + bst r14,5 + bld r20,7 + bst r2,6 + bld r21,0 + bst r6,6 + bld r21,1 + bst r10,6 + bld r21,2 + bst r14,6 + bld r21,3 + bst r2,7 + bld r21,4 + bst r6,7 + bld r21,5 + bst r10,7 + bld r21,6 + bst r14,7 + bld r21,7 + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + bst r3,0 + bld r18,0 + bst r7,0 + bld r18,1 + bst r11,0 + bld r18,2 + bst r15,0 + bld r18,3 + bst r3,1 + bld r18,4 + bst r7,1 + bld r18,5 + bst r11,1 + bld r18,6 + bst r15,1 + bld r18,7 + bst r3,2 + bld r19,0 + bst r7,2 + bld r19,1 + bst r11,2 + bld r19,2 + bst r15,2 + bld r19,3 + bst r3,3 + bld r19,4 + bst r7,3 + bld r19,5 + bst r11,3 + bld r19,6 + bst r15,3 + bld r19,7 + bst r3,4 + bld r20,0 + bst r7,4 + bld r20,1 + bst r11,4 + bld r20,2 + bst r15,4 + bld r20,3 + bst r3,5 + bld r20,4 + bst r7,5 + bld r20,5 + bst r11,5 + bld r20,6 + bst r15,5 + bld r20,7 + bst r3,6 + bld r21,0 + bst r7,6 + bld r21,1 + bst r11,6 + bld r21,2 + bst r15,6 + bld r21,3 + bst r3,7 + bld r21,4 + bst r7,7 + bld r21,5 + bst r11,7 + bld r21,6 + bst r15,7 + bld r21,7 + std Z+4,r18 + std Z+5,r19 + std Z+6,r20 + std Z+7,r21 + bst r4,0 + bld r18,0 + bst r8,0 + bld r18,1 + bst r12,0 + bld r18,2 + bst r24,0 + bld r18,3 + bst r4,1 + bld r18,4 + bst r8,1 + bld r18,5 + bst r12,1 + bld r18,6 + bst r24,1 + bld r18,7 + bst r4,2 + bld r19,0 + bst r8,2 + bld r19,1 + bst r12,2 + bld r19,2 + bst r24,2 + bld r19,3 + bst r4,3 + bld r19,4 + bst r8,3 + bld r19,5 + bst r12,3 + bld r19,6 + bst r24,3 + bld r19,7 + bst r4,4 + bld r20,0 + bst r8,4 + bld r20,1 + bst r12,4 + bld r20,2 + bst r24,4 + bld r20,3 + bst r4,5 + bld r20,4 + bst r8,5 + bld r20,5 + bst r12,5 + bld r20,6 + bst r24,5 + bld r20,7 + bst r4,6 + bld r21,0 + bst r8,6 + bld r21,1 + bst r12,6 + bld r21,2 + bst r24,6 + bld r21,3 + bst r4,7 + bld r21,4 + bst r8,7 + bld r21,5 + bst r12,7 + bld r21,6 + bst r24,7 + bld r21,7 + std Z+8,r18 + std Z+9,r19 + std Z+10,r20 + std Z+11,r21 + bst r5,0 + bld r18,0 + bst r9,0 + bld r18,1 + bst r13,0 + bld r18,2 + bst r25,0 + bld r18,3 + bst r5,1 + bld r18,4 + bst r9,1 + bld r18,5 + bst r13,1 + bld r18,6 + bst r25,1 + bld r18,7 + bst r5,2 + bld r19,0 + bst r9,2 + bld r19,1 + bst r13,2 + bld r19,2 + bst r25,2 + bld r19,3 + bst r5,3 + bld r19,4 + bst r9,3 + bld r19,5 + bst r13,3 + bld r19,6 + bst r25,3 + bld r19,7 + bst r5,4 + bld r20,0 + bst r9,4 + bld r20,1 + bst r13,4 + bld r20,2 + bst r25,4 + bld r20,3 + bst r5,5 + bld r20,4 + bst r9,5 + bld r20,5 + bst r13,5 + bld r20,6 + bst r25,5 + bld r20,7 + bst r5,6 + bld r21,0 + bst r9,6 + bld r21,1 + bst r13,6 + bld r21,2 + bst r25,6 + bld r21,3 + bst r5,7 + bld r21,4 + bst r9,7 + bld r21,5 + bst r13,7 + bld r21,6 + bst r25,7 + bld r21,7 + std Z+12,r18 + std Z+13,r19 + std Z+14,r20 + std Z+15,r21 + ldd r2,Z+16 + ldd r3,Z+17 + ldd r4,Z+18 + ldd r5,Z+19 + ldd r6,Z+20 + ldd r7,Z+21 + ldd r8,Z+22 + ldd r9,Z+23 + ldd r10,Z+24 + ldd r11,Z+25 + ldd r12,Z+26 + ldd r13,Z+27 + ldd r14,Z+28 + ldd r15,Z+29 + ldd r24,Z+30 + ldd r25,Z+31 + bst r2,0 + bld r18,0 + bst r6,0 + bld r18,1 + bst r10,0 + bld r18,2 + bst r14,0 + bld r18,3 + bst r2,1 + bld r18,4 + bst r6,1 + bld r18,5 + bst r10,1 + bld r18,6 + bst r14,1 + bld r18,7 + bst r2,2 + bld r19,0 + bst r6,2 + bld r19,1 + bst r10,2 + bld r19,2 + bst r14,2 + bld r19,3 + bst r2,3 + bld r19,4 + bst r6,3 + bld r19,5 + bst r10,3 + bld r19,6 + bst r14,3 + bld r19,7 + bst r2,4 + bld r20,0 + bst r6,4 + bld r20,1 + bst r10,4 + bld r20,2 + bst r14,4 + bld r20,3 + bst r2,5 + bld r20,4 + bst r6,5 + bld r20,5 + bst r10,5 + bld r20,6 + bst r14,5 + bld r20,7 + bst r2,6 + bld r21,0 + bst r6,6 + bld r21,1 + bst r10,6 + bld r21,2 + bst r14,6 + bld r21,3 + bst r2,7 + bld r21,4 + bst r6,7 + bld r21,5 + bst r10,7 + bld r21,6 + bst r14,7 + bld r21,7 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + bst r3,0 + bld r18,0 + bst r7,0 + bld r18,1 + bst r11,0 + bld r18,2 + bst r15,0 + bld r18,3 + bst r3,1 + bld r18,4 + bst r7,1 + bld r18,5 + bst r11,1 + bld r18,6 + bst r15,1 + bld r18,7 + bst r3,2 + bld r19,0 + bst r7,2 + bld r19,1 + bst r11,2 + bld r19,2 + bst r15,2 + bld r19,3 + bst r3,3 + bld r19,4 + bst r7,3 + bld r19,5 + bst r11,3 + bld r19,6 + bst r15,3 + bld r19,7 + bst r3,4 + bld r20,0 + bst r7,4 + bld r20,1 + bst r11,4 + bld r20,2 + bst r15,4 + bld r20,3 + bst r3,5 + bld r20,4 + bst r7,5 + bld r20,5 + bst r11,5 + bld r20,6 + bst r15,5 + bld r20,7 + bst r3,6 + bld r21,0 + bst r7,6 + bld r21,1 + bst r11,6 + bld r21,2 + bst r15,6 + bld r21,3 + bst r3,7 + bld r21,4 + bst r7,7 + bld r21,5 + bst r11,7 + bld r21,6 + bst r15,7 + bld r21,7 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + bst r4,0 + bld r18,0 + bst r8,0 + bld r18,1 + bst r12,0 + bld r18,2 + bst r24,0 + bld r18,3 + bst r4,1 + bld r18,4 + bst r8,1 + bld r18,5 + bst r12,1 + bld r18,6 + bst r24,1 + bld r18,7 + bst r4,2 + bld r19,0 + bst r8,2 + bld r19,1 + bst r12,2 + bld r19,2 + bst r24,2 + bld r19,3 + bst r4,3 + bld r19,4 + bst r8,3 + bld r19,5 + bst r12,3 + bld r19,6 + bst r24,3 + bld r19,7 + bst r4,4 + bld r20,0 + bst r8,4 + bld r20,1 + bst r12,4 + bld r20,2 + bst r24,4 + bld r20,3 + bst r4,5 + bld r20,4 + bst r8,5 + bld r20,5 + bst r12,5 + bld r20,6 + bst r24,5 + bld r20,7 + bst r4,6 + bld r21,0 + bst r8,6 + bld r21,1 + bst r12,6 + bld r21,2 + bst r24,6 + bld r21,3 + bst r4,7 + bld r21,4 + bst r8,7 + bld r21,5 + bst r12,7 + bld r21,6 + bst r24,7 + bld r21,7 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + bst r5,0 + bld r18,0 + bst r9,0 + bld r18,1 + bst r13,0 + bld r18,2 + bst r25,0 + bld r18,3 + bst r5,1 + bld r18,4 + bst r9,1 + bld r18,5 + bst r13,1 + bld r18,6 + bst r25,1 + bld r18,7 + bst r5,2 + bld r19,0 + bst r9,2 + bld r19,1 + bst r13,2 + bld r19,2 + bst r25,2 + bld r19,3 + bst r5,3 + bld r19,4 + bst r9,3 + bld r19,5 + bst r13,3 + bld r19,6 + bst r25,3 + bld r19,7 + bst r5,4 + bld r20,0 + bst r9,4 + bld r20,1 + bst r13,4 + bld r20,2 + bst r25,4 + bld r20,3 + bst r5,5 + bld r20,4 + bst r9,5 + bld r20,5 + bst r13,5 + bld r20,6 + bst r25,5 + bld r20,7 + bst r5,6 + bld r21,0 + bst r9,6 + bld r21,1 + bst r13,6 + bld r21,2 + bst r25,6 + bld r21,3 + bst r5,7 + bld r21,4 + bst r9,7 + bld r21,5 + bst r13,7 + bld r21,6 + bst r25,7 + bld r21,7 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size photon256_permute, .-photon256_permute + +#endif diff --git a/photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/rhys/internal-photon256.c b/photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/rhys/internal-photon256.c index b8743fe..5cb7dd1 100644 --- a/photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/rhys/internal-photon256.c +++ b/photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/rhys/internal-photon256.c @@ -23,6 +23,8 @@ #include "internal-photon256.h" #include "internal-util.h" +#if !defined(__AVR__) + /** * \brief Number of rounds in the PHOTON-256 permutation in bit-sliced form. */ @@ -477,3 +479,5 @@ void photon256_permute(unsigned char state[PHOTON256_STATE_SIZE]) /* Convert back from bit-sliced form to regular form */ photon256_from_sliced(state, S.bytes); } + +#endif /* !__AVR__ */ diff --git a/romulus/Implementations/crypto_aead/romulusm1/rhys/internal-skinny128.c b/romulus/Implementations/crypto_aead/romulusm1/rhys/internal-skinny128.c index 579ced1..d4adca0 100644 --- a/romulus/Implementations/crypto_aead/romulusm1/rhys/internal-skinny128.c +++ b/romulus/Implementations/crypto_aead/romulusm1/rhys/internal-skinny128.c @@ -90,7 +90,7 @@ void skinny_128_384_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -99,11 +99,25 @@ void skinny_128_384_init schedule[0] = TK2[0] ^ TK3[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ TK3[1] ^ (rc >> 4); - /* Permute TK2 and TK3 for the next round */ - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); + /* Permute the bottom half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_permute_tk_half(TK3[2], TK3[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + skinny128_LFSR3(TK3[2]); + skinny128_LFSR3(TK3[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ TK3[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ TK3[3] ^ (rc >> 4); - /* Apply the LFSR's to TK2 and TK3 */ + /* Permute the top half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); + skinny128_permute_tk_half(TK3[0], TK3[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR3(TK3[0]); @@ -112,6 +126,98 @@ void skinny_128_384_init #endif } +/** + * \brief Performs an unrolled round for Skinny-128-384 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + } while (0) + +/** + * \brief Performs an unrolled round for Skinny-128-384 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK3[(1 - half) * 2]); \ + skinny128_LFSR3(TK3[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_384_encrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -125,7 +231,6 @@ void skinny_128_384_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -150,53 +255,19 @@ void skinny_128_384_encrypt TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_384_round(s0, s1, s2, s3, 0, 0); + skinny_128_384_round(s3, s0, s1, s2, 1, 1); + skinny_128_384_round(s2, s3, s0, s1, 0, 2); + skinny_128_384_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -207,6 +278,93 @@ void skinny_128_384_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Inverse permutation on TK1 for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + s1 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s2 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK3[(1 - half) * 2]); \ + skinny128_LFSR2(TK3[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_384_decrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -218,9 +376,8 @@ void skinny_128_384_decrypt uint32_t TK3[4]; uint8_t rc = 0x15; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -251,7 +408,7 @@ void skinny_128_384_decrypt skinny128_fast_forward_tk(TK2); skinny128_fast_forward_tk(TK3); for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2 and TK3. + /* Also fast-forward the LFSR's on every byte of TK2 and TK3 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -263,50 +420,20 @@ void skinny_128_384_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_inv_permute_tk(TK3); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); - skinny128_LFSR2(TK3[2]); - skinny128_LFSR2(TK3[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); + skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_384_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_384_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_384_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_384_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_384_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_384_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_384_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -316,6 +443,57 @@ void skinny_128_384_decrypt le_store_word32(output + 12, s3); } +/** + * \def skinny_128_384_round_tk2(s0, s1, s2, s3, half) + * \brief Performs an unrolled round for skinny_128_384_encrypt_tk2(). + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#if SKINNY_128_SMALL_SCHEDULE +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round_tk_full(s0, s1, s2, s3, half) +#else /* !SKINNY_128_SMALL_SCHEDULE */ +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2] ^ TK2[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1] ^ \ + TK2[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 and TK2 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) +#endif /* !SKINNY_128_SMALL_SCHEDULE */ + void skinny_128_384_encrypt_tk2 (skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input, const unsigned char *tk2) @@ -329,7 +507,6 @@ void skinny_128_384_encrypt_tk2 #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -354,53 +531,14 @@ void skinny_128_384_encrypt_tk2 TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0] ^ TK2[0]; - s1 ^= schedule[1] ^ TK1[1] ^ TK2[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK3); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); -#else - schedule += 2; + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk2(s0, s1, s2, s3, 0, 0); + skinny_128_384_round_tk2(s3, s0, s1, s2, 1, 1); + skinny_128_384_round_tk2(s2, s3, s0, s1, 0, 2); + skinny_128_384_round_tk2(s1, s2, s3, s0, 1, 3); +#if !SKINNY_128_SMALL_SCHEDULE + schedule += 8; #endif } @@ -419,7 +557,6 @@ void skinny_128_384_encrypt_tk_full uint32_t TK1[4]; uint32_t TK2[4]; uint32_t TK3[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -443,45 +580,12 @@ void skinny_128_384_encrypt_tk_full TK3[2] = le_load_word32(key + 40); TK3[3] = le_load_word32(key + 44); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ @@ -518,7 +622,7 @@ void skinny_128_256_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -527,16 +631,87 @@ void skinny_128_256_init schedule[0] = TK2[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ (rc >> 4); - /* Permute TK2 for the next round */ - skinny128_permute_tk(TK2); + /* Permute the bottom half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ (rc >> 4); - /* Apply the LFSR to TK2 */ + /* Permute the top half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); } #endif } +/** + * \brief Performs an unrolled round for Skinny-128-256 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled round for Skinny-128-256 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_256_encrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -549,7 +724,6 @@ void skinny_128_256_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -570,50 +744,19 @@ void skinny_128_256_encrypt TK2[3] = le_load_word32(ks->TK2 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_256_round(s0, s1, s2, s3, 0, 0); + skinny_128_256_round(s3, s0, s1, s2, 1, 1); + skinny_128_256_round(s2, s3, s0, s1, 0, 2); + skinny_128_256_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -624,6 +767,63 @@ void skinny_128_256_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_inv_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_256_decrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -634,9 +834,8 @@ void skinny_128_256_decrypt uint32_t TK2[4]; uint8_t rc = 0x09; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -658,7 +857,7 @@ void skinny_128_256_decrypt TK2[2] = le_load_word32(ks->TK2 + 8); TK2[3] = le_load_word32(ks->TK2 + 12); for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2. + /* Also fast-forward the LFSR's on every byte of TK2 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -666,47 +865,20 @@ void skinny_128_256_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); + skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_256_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_256_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_256_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_256_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_256_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_256_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_256_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -723,7 +895,6 @@ void skinny_128_256_encrypt_tk_full uint32_t s0, s1, s2, s3; uint32_t TK1[4]; uint32_t TK2[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -743,42 +914,12 @@ void skinny_128_256_encrypt_tk_full TK2[2] = le_load_word32(key + 24); TK2[3] = le_load_word32(key + 28); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ diff --git a/romulus/Implementations/crypto_aead/romulusm1/rhys/internal-skinnyutil.h b/romulus/Implementations/crypto_aead/romulusm1/rhys/internal-skinnyutil.h index 83136cb..8a5296d 100644 --- a/romulus/Implementations/crypto_aead/romulusm1/rhys/internal-skinnyutil.h +++ b/romulus/Implementations/crypto_aead/romulusm1/rhys/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: diff --git a/romulus/Implementations/crypto_aead/romulusm2/rhys/internal-skinny128.c b/romulus/Implementations/crypto_aead/romulusm2/rhys/internal-skinny128.c index 579ced1..d4adca0 100644 --- a/romulus/Implementations/crypto_aead/romulusm2/rhys/internal-skinny128.c +++ b/romulus/Implementations/crypto_aead/romulusm2/rhys/internal-skinny128.c @@ -90,7 +90,7 @@ void skinny_128_384_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -99,11 +99,25 @@ void skinny_128_384_init schedule[0] = TK2[0] ^ TK3[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ TK3[1] ^ (rc >> 4); - /* Permute TK2 and TK3 for the next round */ - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); + /* Permute the bottom half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_permute_tk_half(TK3[2], TK3[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + skinny128_LFSR3(TK3[2]); + skinny128_LFSR3(TK3[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ TK3[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ TK3[3] ^ (rc >> 4); - /* Apply the LFSR's to TK2 and TK3 */ + /* Permute the top half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); + skinny128_permute_tk_half(TK3[0], TK3[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR3(TK3[0]); @@ -112,6 +126,98 @@ void skinny_128_384_init #endif } +/** + * \brief Performs an unrolled round for Skinny-128-384 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + } while (0) + +/** + * \brief Performs an unrolled round for Skinny-128-384 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK3[(1 - half) * 2]); \ + skinny128_LFSR3(TK3[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_384_encrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -125,7 +231,6 @@ void skinny_128_384_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -150,53 +255,19 @@ void skinny_128_384_encrypt TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_384_round(s0, s1, s2, s3, 0, 0); + skinny_128_384_round(s3, s0, s1, s2, 1, 1); + skinny_128_384_round(s2, s3, s0, s1, 0, 2); + skinny_128_384_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -207,6 +278,93 @@ void skinny_128_384_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Inverse permutation on TK1 for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + s1 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s2 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK3[(1 - half) * 2]); \ + skinny128_LFSR2(TK3[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_384_decrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -218,9 +376,8 @@ void skinny_128_384_decrypt uint32_t TK3[4]; uint8_t rc = 0x15; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -251,7 +408,7 @@ void skinny_128_384_decrypt skinny128_fast_forward_tk(TK2); skinny128_fast_forward_tk(TK3); for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2 and TK3. + /* Also fast-forward the LFSR's on every byte of TK2 and TK3 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -263,50 +420,20 @@ void skinny_128_384_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_inv_permute_tk(TK3); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); - skinny128_LFSR2(TK3[2]); - skinny128_LFSR2(TK3[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); + skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_384_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_384_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_384_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_384_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_384_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_384_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_384_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -316,6 +443,57 @@ void skinny_128_384_decrypt le_store_word32(output + 12, s3); } +/** + * \def skinny_128_384_round_tk2(s0, s1, s2, s3, half) + * \brief Performs an unrolled round for skinny_128_384_encrypt_tk2(). + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#if SKINNY_128_SMALL_SCHEDULE +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round_tk_full(s0, s1, s2, s3, half) +#else /* !SKINNY_128_SMALL_SCHEDULE */ +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2] ^ TK2[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1] ^ \ + TK2[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 and TK2 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) +#endif /* !SKINNY_128_SMALL_SCHEDULE */ + void skinny_128_384_encrypt_tk2 (skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input, const unsigned char *tk2) @@ -329,7 +507,6 @@ void skinny_128_384_encrypt_tk2 #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -354,53 +531,14 @@ void skinny_128_384_encrypt_tk2 TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0] ^ TK2[0]; - s1 ^= schedule[1] ^ TK1[1] ^ TK2[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK3); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); -#else - schedule += 2; + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk2(s0, s1, s2, s3, 0, 0); + skinny_128_384_round_tk2(s3, s0, s1, s2, 1, 1); + skinny_128_384_round_tk2(s2, s3, s0, s1, 0, 2); + skinny_128_384_round_tk2(s1, s2, s3, s0, 1, 3); +#if !SKINNY_128_SMALL_SCHEDULE + schedule += 8; #endif } @@ -419,7 +557,6 @@ void skinny_128_384_encrypt_tk_full uint32_t TK1[4]; uint32_t TK2[4]; uint32_t TK3[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -443,45 +580,12 @@ void skinny_128_384_encrypt_tk_full TK3[2] = le_load_word32(key + 40); TK3[3] = le_load_word32(key + 44); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ @@ -518,7 +622,7 @@ void skinny_128_256_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -527,16 +631,87 @@ void skinny_128_256_init schedule[0] = TK2[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ (rc >> 4); - /* Permute TK2 for the next round */ - skinny128_permute_tk(TK2); + /* Permute the bottom half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ (rc >> 4); - /* Apply the LFSR to TK2 */ + /* Permute the top half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); } #endif } +/** + * \brief Performs an unrolled round for Skinny-128-256 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled round for Skinny-128-256 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_256_encrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -549,7 +724,6 @@ void skinny_128_256_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -570,50 +744,19 @@ void skinny_128_256_encrypt TK2[3] = le_load_word32(ks->TK2 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_256_round(s0, s1, s2, s3, 0, 0); + skinny_128_256_round(s3, s0, s1, s2, 1, 1); + skinny_128_256_round(s2, s3, s0, s1, 0, 2); + skinny_128_256_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -624,6 +767,63 @@ void skinny_128_256_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_inv_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_256_decrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -634,9 +834,8 @@ void skinny_128_256_decrypt uint32_t TK2[4]; uint8_t rc = 0x09; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -658,7 +857,7 @@ void skinny_128_256_decrypt TK2[2] = le_load_word32(ks->TK2 + 8); TK2[3] = le_load_word32(ks->TK2 + 12); for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2. + /* Also fast-forward the LFSR's on every byte of TK2 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -666,47 +865,20 @@ void skinny_128_256_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); + skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_256_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_256_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_256_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_256_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_256_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_256_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_256_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -723,7 +895,6 @@ void skinny_128_256_encrypt_tk_full uint32_t s0, s1, s2, s3; uint32_t TK1[4]; uint32_t TK2[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -743,42 +914,12 @@ void skinny_128_256_encrypt_tk_full TK2[2] = le_load_word32(key + 24); TK2[3] = le_load_word32(key + 28); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ diff --git a/romulus/Implementations/crypto_aead/romulusm2/rhys/internal-skinnyutil.h b/romulus/Implementations/crypto_aead/romulusm2/rhys/internal-skinnyutil.h index 83136cb..8a5296d 100644 --- a/romulus/Implementations/crypto_aead/romulusm2/rhys/internal-skinnyutil.h +++ b/romulus/Implementations/crypto_aead/romulusm2/rhys/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: diff --git a/romulus/Implementations/crypto_aead/romulusm3/rhys/internal-skinny128.c b/romulus/Implementations/crypto_aead/romulusm3/rhys/internal-skinny128.c index 579ced1..d4adca0 100644 --- a/romulus/Implementations/crypto_aead/romulusm3/rhys/internal-skinny128.c +++ b/romulus/Implementations/crypto_aead/romulusm3/rhys/internal-skinny128.c @@ -90,7 +90,7 @@ void skinny_128_384_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -99,11 +99,25 @@ void skinny_128_384_init schedule[0] = TK2[0] ^ TK3[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ TK3[1] ^ (rc >> 4); - /* Permute TK2 and TK3 for the next round */ - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); + /* Permute the bottom half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_permute_tk_half(TK3[2], TK3[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + skinny128_LFSR3(TK3[2]); + skinny128_LFSR3(TK3[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ TK3[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ TK3[3] ^ (rc >> 4); - /* Apply the LFSR's to TK2 and TK3 */ + /* Permute the top half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); + skinny128_permute_tk_half(TK3[0], TK3[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR3(TK3[0]); @@ -112,6 +126,98 @@ void skinny_128_384_init #endif } +/** + * \brief Performs an unrolled round for Skinny-128-384 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + } while (0) + +/** + * \brief Performs an unrolled round for Skinny-128-384 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK3[(1 - half) * 2]); \ + skinny128_LFSR3(TK3[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_384_encrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -125,7 +231,6 @@ void skinny_128_384_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -150,53 +255,19 @@ void skinny_128_384_encrypt TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_384_round(s0, s1, s2, s3, 0, 0); + skinny_128_384_round(s3, s0, s1, s2, 1, 1); + skinny_128_384_round(s2, s3, s0, s1, 0, 2); + skinny_128_384_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -207,6 +278,93 @@ void skinny_128_384_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Inverse permutation on TK1 for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + s1 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s2 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK3[(1 - half) * 2]); \ + skinny128_LFSR2(TK3[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_384_decrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -218,9 +376,8 @@ void skinny_128_384_decrypt uint32_t TK3[4]; uint8_t rc = 0x15; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -251,7 +408,7 @@ void skinny_128_384_decrypt skinny128_fast_forward_tk(TK2); skinny128_fast_forward_tk(TK3); for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2 and TK3. + /* Also fast-forward the LFSR's on every byte of TK2 and TK3 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -263,50 +420,20 @@ void skinny_128_384_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_inv_permute_tk(TK3); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); - skinny128_LFSR2(TK3[2]); - skinny128_LFSR2(TK3[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); + skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_384_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_384_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_384_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_384_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_384_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_384_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_384_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -316,6 +443,57 @@ void skinny_128_384_decrypt le_store_word32(output + 12, s3); } +/** + * \def skinny_128_384_round_tk2(s0, s1, s2, s3, half) + * \brief Performs an unrolled round for skinny_128_384_encrypt_tk2(). + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#if SKINNY_128_SMALL_SCHEDULE +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round_tk_full(s0, s1, s2, s3, half) +#else /* !SKINNY_128_SMALL_SCHEDULE */ +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2] ^ TK2[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1] ^ \ + TK2[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 and TK2 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) +#endif /* !SKINNY_128_SMALL_SCHEDULE */ + void skinny_128_384_encrypt_tk2 (skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input, const unsigned char *tk2) @@ -329,7 +507,6 @@ void skinny_128_384_encrypt_tk2 #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -354,53 +531,14 @@ void skinny_128_384_encrypt_tk2 TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0] ^ TK2[0]; - s1 ^= schedule[1] ^ TK1[1] ^ TK2[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK3); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); -#else - schedule += 2; + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk2(s0, s1, s2, s3, 0, 0); + skinny_128_384_round_tk2(s3, s0, s1, s2, 1, 1); + skinny_128_384_round_tk2(s2, s3, s0, s1, 0, 2); + skinny_128_384_round_tk2(s1, s2, s3, s0, 1, 3); +#if !SKINNY_128_SMALL_SCHEDULE + schedule += 8; #endif } @@ -419,7 +557,6 @@ void skinny_128_384_encrypt_tk_full uint32_t TK1[4]; uint32_t TK2[4]; uint32_t TK3[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -443,45 +580,12 @@ void skinny_128_384_encrypt_tk_full TK3[2] = le_load_word32(key + 40); TK3[3] = le_load_word32(key + 44); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ @@ -518,7 +622,7 @@ void skinny_128_256_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -527,16 +631,87 @@ void skinny_128_256_init schedule[0] = TK2[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ (rc >> 4); - /* Permute TK2 for the next round */ - skinny128_permute_tk(TK2); + /* Permute the bottom half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ (rc >> 4); - /* Apply the LFSR to TK2 */ + /* Permute the top half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); } #endif } +/** + * \brief Performs an unrolled round for Skinny-128-256 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled round for Skinny-128-256 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_256_encrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -549,7 +724,6 @@ void skinny_128_256_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -570,50 +744,19 @@ void skinny_128_256_encrypt TK2[3] = le_load_word32(ks->TK2 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_256_round(s0, s1, s2, s3, 0, 0); + skinny_128_256_round(s3, s0, s1, s2, 1, 1); + skinny_128_256_round(s2, s3, s0, s1, 0, 2); + skinny_128_256_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -624,6 +767,63 @@ void skinny_128_256_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_inv_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_256_decrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -634,9 +834,8 @@ void skinny_128_256_decrypt uint32_t TK2[4]; uint8_t rc = 0x09; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -658,7 +857,7 @@ void skinny_128_256_decrypt TK2[2] = le_load_word32(ks->TK2 + 8); TK2[3] = le_load_word32(ks->TK2 + 12); for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2. + /* Also fast-forward the LFSR's on every byte of TK2 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -666,47 +865,20 @@ void skinny_128_256_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); + skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_256_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_256_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_256_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_256_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_256_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_256_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_256_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -723,7 +895,6 @@ void skinny_128_256_encrypt_tk_full uint32_t s0, s1, s2, s3; uint32_t TK1[4]; uint32_t TK2[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -743,42 +914,12 @@ void skinny_128_256_encrypt_tk_full TK2[2] = le_load_word32(key + 24); TK2[3] = le_load_word32(key + 28); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ diff --git a/romulus/Implementations/crypto_aead/romulusm3/rhys/internal-skinnyutil.h b/romulus/Implementations/crypto_aead/romulusm3/rhys/internal-skinnyutil.h index 83136cb..8a5296d 100644 --- a/romulus/Implementations/crypto_aead/romulusm3/rhys/internal-skinnyutil.h +++ b/romulus/Implementations/crypto_aead/romulusm3/rhys/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: diff --git a/romulus/Implementations/crypto_aead/romulusn1/rhys/internal-skinny128.c b/romulus/Implementations/crypto_aead/romulusn1/rhys/internal-skinny128.c index 579ced1..d4adca0 100644 --- a/romulus/Implementations/crypto_aead/romulusn1/rhys/internal-skinny128.c +++ b/romulus/Implementations/crypto_aead/romulusn1/rhys/internal-skinny128.c @@ -90,7 +90,7 @@ void skinny_128_384_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -99,11 +99,25 @@ void skinny_128_384_init schedule[0] = TK2[0] ^ TK3[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ TK3[1] ^ (rc >> 4); - /* Permute TK2 and TK3 for the next round */ - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); + /* Permute the bottom half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_permute_tk_half(TK3[2], TK3[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + skinny128_LFSR3(TK3[2]); + skinny128_LFSR3(TK3[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ TK3[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ TK3[3] ^ (rc >> 4); - /* Apply the LFSR's to TK2 and TK3 */ + /* Permute the top half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); + skinny128_permute_tk_half(TK3[0], TK3[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR3(TK3[0]); @@ -112,6 +126,98 @@ void skinny_128_384_init #endif } +/** + * \brief Performs an unrolled round for Skinny-128-384 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + } while (0) + +/** + * \brief Performs an unrolled round for Skinny-128-384 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK3[(1 - half) * 2]); \ + skinny128_LFSR3(TK3[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_384_encrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -125,7 +231,6 @@ void skinny_128_384_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -150,53 +255,19 @@ void skinny_128_384_encrypt TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_384_round(s0, s1, s2, s3, 0, 0); + skinny_128_384_round(s3, s0, s1, s2, 1, 1); + skinny_128_384_round(s2, s3, s0, s1, 0, 2); + skinny_128_384_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -207,6 +278,93 @@ void skinny_128_384_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Inverse permutation on TK1 for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + s1 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s2 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK3[(1 - half) * 2]); \ + skinny128_LFSR2(TK3[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_384_decrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -218,9 +376,8 @@ void skinny_128_384_decrypt uint32_t TK3[4]; uint8_t rc = 0x15; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -251,7 +408,7 @@ void skinny_128_384_decrypt skinny128_fast_forward_tk(TK2); skinny128_fast_forward_tk(TK3); for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2 and TK3. + /* Also fast-forward the LFSR's on every byte of TK2 and TK3 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -263,50 +420,20 @@ void skinny_128_384_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_inv_permute_tk(TK3); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); - skinny128_LFSR2(TK3[2]); - skinny128_LFSR2(TK3[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); + skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_384_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_384_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_384_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_384_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_384_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_384_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_384_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -316,6 +443,57 @@ void skinny_128_384_decrypt le_store_word32(output + 12, s3); } +/** + * \def skinny_128_384_round_tk2(s0, s1, s2, s3, half) + * \brief Performs an unrolled round for skinny_128_384_encrypt_tk2(). + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#if SKINNY_128_SMALL_SCHEDULE +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round_tk_full(s0, s1, s2, s3, half) +#else /* !SKINNY_128_SMALL_SCHEDULE */ +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2] ^ TK2[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1] ^ \ + TK2[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 and TK2 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) +#endif /* !SKINNY_128_SMALL_SCHEDULE */ + void skinny_128_384_encrypt_tk2 (skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input, const unsigned char *tk2) @@ -329,7 +507,6 @@ void skinny_128_384_encrypt_tk2 #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -354,53 +531,14 @@ void skinny_128_384_encrypt_tk2 TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0] ^ TK2[0]; - s1 ^= schedule[1] ^ TK1[1] ^ TK2[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK3); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); -#else - schedule += 2; + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk2(s0, s1, s2, s3, 0, 0); + skinny_128_384_round_tk2(s3, s0, s1, s2, 1, 1); + skinny_128_384_round_tk2(s2, s3, s0, s1, 0, 2); + skinny_128_384_round_tk2(s1, s2, s3, s0, 1, 3); +#if !SKINNY_128_SMALL_SCHEDULE + schedule += 8; #endif } @@ -419,7 +557,6 @@ void skinny_128_384_encrypt_tk_full uint32_t TK1[4]; uint32_t TK2[4]; uint32_t TK3[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -443,45 +580,12 @@ void skinny_128_384_encrypt_tk_full TK3[2] = le_load_word32(key + 40); TK3[3] = le_load_word32(key + 44); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ @@ -518,7 +622,7 @@ void skinny_128_256_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -527,16 +631,87 @@ void skinny_128_256_init schedule[0] = TK2[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ (rc >> 4); - /* Permute TK2 for the next round */ - skinny128_permute_tk(TK2); + /* Permute the bottom half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ (rc >> 4); - /* Apply the LFSR to TK2 */ + /* Permute the top half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); } #endif } +/** + * \brief Performs an unrolled round for Skinny-128-256 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled round for Skinny-128-256 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_256_encrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -549,7 +724,6 @@ void skinny_128_256_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -570,50 +744,19 @@ void skinny_128_256_encrypt TK2[3] = le_load_word32(ks->TK2 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_256_round(s0, s1, s2, s3, 0, 0); + skinny_128_256_round(s3, s0, s1, s2, 1, 1); + skinny_128_256_round(s2, s3, s0, s1, 0, 2); + skinny_128_256_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -624,6 +767,63 @@ void skinny_128_256_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_inv_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_256_decrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -634,9 +834,8 @@ void skinny_128_256_decrypt uint32_t TK2[4]; uint8_t rc = 0x09; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -658,7 +857,7 @@ void skinny_128_256_decrypt TK2[2] = le_load_word32(ks->TK2 + 8); TK2[3] = le_load_word32(ks->TK2 + 12); for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2. + /* Also fast-forward the LFSR's on every byte of TK2 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -666,47 +865,20 @@ void skinny_128_256_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); + skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_256_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_256_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_256_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_256_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_256_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_256_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_256_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -723,7 +895,6 @@ void skinny_128_256_encrypt_tk_full uint32_t s0, s1, s2, s3; uint32_t TK1[4]; uint32_t TK2[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -743,42 +914,12 @@ void skinny_128_256_encrypt_tk_full TK2[2] = le_load_word32(key + 24); TK2[3] = le_load_word32(key + 28); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ diff --git a/romulus/Implementations/crypto_aead/romulusn1/rhys/internal-skinnyutil.h b/romulus/Implementations/crypto_aead/romulusn1/rhys/internal-skinnyutil.h index 83136cb..8a5296d 100644 --- a/romulus/Implementations/crypto_aead/romulusn1/rhys/internal-skinnyutil.h +++ b/romulus/Implementations/crypto_aead/romulusn1/rhys/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: diff --git a/romulus/Implementations/crypto_aead/romulusn2/rhys/internal-skinny128.c b/romulus/Implementations/crypto_aead/romulusn2/rhys/internal-skinny128.c index 579ced1..d4adca0 100644 --- a/romulus/Implementations/crypto_aead/romulusn2/rhys/internal-skinny128.c +++ b/romulus/Implementations/crypto_aead/romulusn2/rhys/internal-skinny128.c @@ -90,7 +90,7 @@ void skinny_128_384_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -99,11 +99,25 @@ void skinny_128_384_init schedule[0] = TK2[0] ^ TK3[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ TK3[1] ^ (rc >> 4); - /* Permute TK2 and TK3 for the next round */ - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); + /* Permute the bottom half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_permute_tk_half(TK3[2], TK3[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + skinny128_LFSR3(TK3[2]); + skinny128_LFSR3(TK3[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ TK3[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ TK3[3] ^ (rc >> 4); - /* Apply the LFSR's to TK2 and TK3 */ + /* Permute the top half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); + skinny128_permute_tk_half(TK3[0], TK3[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR3(TK3[0]); @@ -112,6 +126,98 @@ void skinny_128_384_init #endif } +/** + * \brief Performs an unrolled round for Skinny-128-384 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + } while (0) + +/** + * \brief Performs an unrolled round for Skinny-128-384 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK3[(1 - half) * 2]); \ + skinny128_LFSR3(TK3[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_384_encrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -125,7 +231,6 @@ void skinny_128_384_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -150,53 +255,19 @@ void skinny_128_384_encrypt TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_384_round(s0, s1, s2, s3, 0, 0); + skinny_128_384_round(s3, s0, s1, s2, 1, 1); + skinny_128_384_round(s2, s3, s0, s1, 0, 2); + skinny_128_384_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -207,6 +278,93 @@ void skinny_128_384_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Inverse permutation on TK1 for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + s1 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s2 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK3[(1 - half) * 2]); \ + skinny128_LFSR2(TK3[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_384_decrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -218,9 +376,8 @@ void skinny_128_384_decrypt uint32_t TK3[4]; uint8_t rc = 0x15; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -251,7 +408,7 @@ void skinny_128_384_decrypt skinny128_fast_forward_tk(TK2); skinny128_fast_forward_tk(TK3); for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2 and TK3. + /* Also fast-forward the LFSR's on every byte of TK2 and TK3 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -263,50 +420,20 @@ void skinny_128_384_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_inv_permute_tk(TK3); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); - skinny128_LFSR2(TK3[2]); - skinny128_LFSR2(TK3[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); + skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_384_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_384_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_384_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_384_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_384_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_384_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_384_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -316,6 +443,57 @@ void skinny_128_384_decrypt le_store_word32(output + 12, s3); } +/** + * \def skinny_128_384_round_tk2(s0, s1, s2, s3, half) + * \brief Performs an unrolled round for skinny_128_384_encrypt_tk2(). + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#if SKINNY_128_SMALL_SCHEDULE +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round_tk_full(s0, s1, s2, s3, half) +#else /* !SKINNY_128_SMALL_SCHEDULE */ +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2] ^ TK2[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1] ^ \ + TK2[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 and TK2 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) +#endif /* !SKINNY_128_SMALL_SCHEDULE */ + void skinny_128_384_encrypt_tk2 (skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input, const unsigned char *tk2) @@ -329,7 +507,6 @@ void skinny_128_384_encrypt_tk2 #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -354,53 +531,14 @@ void skinny_128_384_encrypt_tk2 TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0] ^ TK2[0]; - s1 ^= schedule[1] ^ TK1[1] ^ TK2[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK3); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); -#else - schedule += 2; + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk2(s0, s1, s2, s3, 0, 0); + skinny_128_384_round_tk2(s3, s0, s1, s2, 1, 1); + skinny_128_384_round_tk2(s2, s3, s0, s1, 0, 2); + skinny_128_384_round_tk2(s1, s2, s3, s0, 1, 3); +#if !SKINNY_128_SMALL_SCHEDULE + schedule += 8; #endif } @@ -419,7 +557,6 @@ void skinny_128_384_encrypt_tk_full uint32_t TK1[4]; uint32_t TK2[4]; uint32_t TK3[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -443,45 +580,12 @@ void skinny_128_384_encrypt_tk_full TK3[2] = le_load_word32(key + 40); TK3[3] = le_load_word32(key + 44); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ @@ -518,7 +622,7 @@ void skinny_128_256_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -527,16 +631,87 @@ void skinny_128_256_init schedule[0] = TK2[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ (rc >> 4); - /* Permute TK2 for the next round */ - skinny128_permute_tk(TK2); + /* Permute the bottom half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ (rc >> 4); - /* Apply the LFSR to TK2 */ + /* Permute the top half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); } #endif } +/** + * \brief Performs an unrolled round for Skinny-128-256 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled round for Skinny-128-256 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_256_encrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -549,7 +724,6 @@ void skinny_128_256_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -570,50 +744,19 @@ void skinny_128_256_encrypt TK2[3] = le_load_word32(ks->TK2 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_256_round(s0, s1, s2, s3, 0, 0); + skinny_128_256_round(s3, s0, s1, s2, 1, 1); + skinny_128_256_round(s2, s3, s0, s1, 0, 2); + skinny_128_256_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -624,6 +767,63 @@ void skinny_128_256_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_inv_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_256_decrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -634,9 +834,8 @@ void skinny_128_256_decrypt uint32_t TK2[4]; uint8_t rc = 0x09; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -658,7 +857,7 @@ void skinny_128_256_decrypt TK2[2] = le_load_word32(ks->TK2 + 8); TK2[3] = le_load_word32(ks->TK2 + 12); for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2. + /* Also fast-forward the LFSR's on every byte of TK2 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -666,47 +865,20 @@ void skinny_128_256_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); + skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_256_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_256_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_256_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_256_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_256_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_256_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_256_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -723,7 +895,6 @@ void skinny_128_256_encrypt_tk_full uint32_t s0, s1, s2, s3; uint32_t TK1[4]; uint32_t TK2[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -743,42 +914,12 @@ void skinny_128_256_encrypt_tk_full TK2[2] = le_load_word32(key + 24); TK2[3] = le_load_word32(key + 28); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ diff --git a/romulus/Implementations/crypto_aead/romulusn2/rhys/internal-skinnyutil.h b/romulus/Implementations/crypto_aead/romulusn2/rhys/internal-skinnyutil.h index 83136cb..8a5296d 100644 --- a/romulus/Implementations/crypto_aead/romulusn2/rhys/internal-skinnyutil.h +++ b/romulus/Implementations/crypto_aead/romulusn2/rhys/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: diff --git a/romulus/Implementations/crypto_aead/romulusn3/rhys/internal-skinny128.c b/romulus/Implementations/crypto_aead/romulusn3/rhys/internal-skinny128.c index 579ced1..d4adca0 100644 --- a/romulus/Implementations/crypto_aead/romulusn3/rhys/internal-skinny128.c +++ b/romulus/Implementations/crypto_aead/romulusn3/rhys/internal-skinny128.c @@ -90,7 +90,7 @@ void skinny_128_384_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -99,11 +99,25 @@ void skinny_128_384_init schedule[0] = TK2[0] ^ TK3[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ TK3[1] ^ (rc >> 4); - /* Permute TK2 and TK3 for the next round */ - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); + /* Permute the bottom half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_permute_tk_half(TK3[2], TK3[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + skinny128_LFSR3(TK3[2]); + skinny128_LFSR3(TK3[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ TK3[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ TK3[3] ^ (rc >> 4); - /* Apply the LFSR's to TK2 and TK3 */ + /* Permute the top half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); + skinny128_permute_tk_half(TK3[0], TK3[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR3(TK3[0]); @@ -112,6 +126,98 @@ void skinny_128_384_init #endif } +/** + * \brief Performs an unrolled round for Skinny-128-384 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + } while (0) + +/** + * \brief Performs an unrolled round for Skinny-128-384 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK3[(1 - half) * 2]); \ + skinny128_LFSR3(TK3[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_384_encrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -125,7 +231,6 @@ void skinny_128_384_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -150,53 +255,19 @@ void skinny_128_384_encrypt TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_384_round(s0, s1, s2, s3, 0, 0); + skinny_128_384_round(s3, s0, s1, s2, 1, 1); + skinny_128_384_round(s2, s3, s0, s1, 0, 2); + skinny_128_384_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -207,6 +278,93 @@ void skinny_128_384_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Inverse permutation on TK1 for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + s1 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s2 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK3[(1 - half) * 2]); \ + skinny128_LFSR2(TK3[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_384_decrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -218,9 +376,8 @@ void skinny_128_384_decrypt uint32_t TK3[4]; uint8_t rc = 0x15; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -251,7 +408,7 @@ void skinny_128_384_decrypt skinny128_fast_forward_tk(TK2); skinny128_fast_forward_tk(TK3); for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2 and TK3. + /* Also fast-forward the LFSR's on every byte of TK2 and TK3 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -263,50 +420,20 @@ void skinny_128_384_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_inv_permute_tk(TK3); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); - skinny128_LFSR2(TK3[2]); - skinny128_LFSR2(TK3[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); + skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_384_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_384_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_384_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_384_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_384_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_384_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_384_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -316,6 +443,57 @@ void skinny_128_384_decrypt le_store_word32(output + 12, s3); } +/** + * \def skinny_128_384_round_tk2(s0, s1, s2, s3, half) + * \brief Performs an unrolled round for skinny_128_384_encrypt_tk2(). + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#if SKINNY_128_SMALL_SCHEDULE +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round_tk_full(s0, s1, s2, s3, half) +#else /* !SKINNY_128_SMALL_SCHEDULE */ +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2] ^ TK2[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1] ^ \ + TK2[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 and TK2 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) +#endif /* !SKINNY_128_SMALL_SCHEDULE */ + void skinny_128_384_encrypt_tk2 (skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input, const unsigned char *tk2) @@ -329,7 +507,6 @@ void skinny_128_384_encrypt_tk2 #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -354,53 +531,14 @@ void skinny_128_384_encrypt_tk2 TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0] ^ TK2[0]; - s1 ^= schedule[1] ^ TK1[1] ^ TK2[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK3); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); -#else - schedule += 2; + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk2(s0, s1, s2, s3, 0, 0); + skinny_128_384_round_tk2(s3, s0, s1, s2, 1, 1); + skinny_128_384_round_tk2(s2, s3, s0, s1, 0, 2); + skinny_128_384_round_tk2(s1, s2, s3, s0, 1, 3); +#if !SKINNY_128_SMALL_SCHEDULE + schedule += 8; #endif } @@ -419,7 +557,6 @@ void skinny_128_384_encrypt_tk_full uint32_t TK1[4]; uint32_t TK2[4]; uint32_t TK3[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -443,45 +580,12 @@ void skinny_128_384_encrypt_tk_full TK3[2] = le_load_word32(key + 40); TK3[3] = le_load_word32(key + 44); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ @@ -518,7 +622,7 @@ void skinny_128_256_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -527,16 +631,87 @@ void skinny_128_256_init schedule[0] = TK2[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ (rc >> 4); - /* Permute TK2 for the next round */ - skinny128_permute_tk(TK2); + /* Permute the bottom half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ (rc >> 4); - /* Apply the LFSR to TK2 */ + /* Permute the top half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); } #endif } +/** + * \brief Performs an unrolled round for Skinny-128-256 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled round for Skinny-128-256 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_256_encrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -549,7 +724,6 @@ void skinny_128_256_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -570,50 +744,19 @@ void skinny_128_256_encrypt TK2[3] = le_load_word32(ks->TK2 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_256_round(s0, s1, s2, s3, 0, 0); + skinny_128_256_round(s3, s0, s1, s2, 1, 1); + skinny_128_256_round(s2, s3, s0, s1, 0, 2); + skinny_128_256_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -624,6 +767,63 @@ void skinny_128_256_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_inv_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_256_decrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -634,9 +834,8 @@ void skinny_128_256_decrypt uint32_t TK2[4]; uint8_t rc = 0x09; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -658,7 +857,7 @@ void skinny_128_256_decrypt TK2[2] = le_load_word32(ks->TK2 + 8); TK2[3] = le_load_word32(ks->TK2 + 12); for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2. + /* Also fast-forward the LFSR's on every byte of TK2 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -666,47 +865,20 @@ void skinny_128_256_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); + skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_256_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_256_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_256_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_256_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_256_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_256_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_256_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -723,7 +895,6 @@ void skinny_128_256_encrypt_tk_full uint32_t s0, s1, s2, s3; uint32_t TK1[4]; uint32_t TK2[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -743,42 +914,12 @@ void skinny_128_256_encrypt_tk_full TK2[2] = le_load_word32(key + 24); TK2[3] = le_load_word32(key + 28); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ diff --git a/romulus/Implementations/crypto_aead/romulusn3/rhys/internal-skinnyutil.h b/romulus/Implementations/crypto_aead/romulusn3/rhys/internal-skinnyutil.h index 83136cb..8a5296d 100644 --- a/romulus/Implementations/crypto_aead/romulusn3/rhys/internal-skinnyutil.h +++ b/romulus/Implementations/crypto_aead/romulusn3/rhys/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: diff --git a/saturnin/Implementations/crypto_aead/saturninctrcascadev2/rhys/internal-saturnin-avr.S b/saturnin/Implementations/crypto_aead/saturninctrcascadev2/rhys/internal-saturnin-avr.S new file mode 100644 index 0000000..f20ce72 --- /dev/null +++ b/saturnin/Implementations/crypto_aead/saturninctrcascadev2/rhys/internal-saturnin-avr.S @@ -0,0 +1,6365 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_0, @object + .size table_0, 384 +table_0: + .byte 194 + .byte 38 + .byte 176 + .byte 78 + .byte 3 + .byte 83 + .byte 89 + .byte 144 + .byte 50 + .byte 230 + .byte 143 + .byte 170 + .byte 146 + .byte 138 + .byte 146 + .byte 254 + .byte 25 + .byte 164 + .byte 21 + .byte 65 + .byte 50 + .byte 149 + .byte 83 + .byte 147 + .byte 78 + .byte 204 + .byte 177 + .byte 93 + .byte 202 + .byte 21 + .byte 21 + .byte 84 + .byte 168 + .byte 85 + .byte 31 + .byte 189 + .byte 13 + .byte 26 + .byte 110 + .byte 90 + .byte 181 + .byte 38 + .byte 69 + .byte 78 + .byte 240 + .byte 95 + .byte 86 + .byte 163 + .byte 216 + .byte 32 + .byte 143 + .byte 15 + .byte 225 + .byte 190 + .byte 84 + .byte 11 + .byte 157 + .byte 108 + .byte 26 + .byte 125 + .byte 10 + .byte 40 + .byte 166 + .byte 23 + .byte 134 + .byte 201 + .byte 70 + .byte 170 + .byte 98 + .byte 144 + .byte 25 + .byte 193 + .byte 222 + .byte 92 + .byte 44 + .byte 24 + .byte 254 + .byte 83 + .byte 13 + .byte 160 + .byte 152 + .byte 38 + .byte 22 + .byte 78 + .byte 161 + .byte 91 + .byte 83 + .byte 178 + .byte 101 + .byte 157 + .byte 143 + .byte 108 + .byte 48 + .byte 173 + .byte 22 + .byte 88 + .byte 250 + .byte 212 + .byte 31 + .byte 105 + .byte 249 + .byte 188 + .byte 245 + .byte 107 + .byte 37 + .byte 53 + .byte 235 + .byte 248 + .byte 250 + .byte 236 + .byte 29 + .byte 178 + .byte 23 + .byte 164 + .byte 61 + .byte 123 + .byte 180 + .byte 148 + .byte 44 + .byte 246 + .byte 91 + .byte 38 + .byte 175 + .byte 79 + .byte 22 + .byte 70 + .byte 72 + .byte 197 + .byte 33 + .byte 173 + .byte 220 + .byte 69 + .byte 7 + .byte 214 + .byte 139 + .byte 224 + .byte 184 + .byte 253 + .byte 4 + .byte 5 + .byte 87 + .byte 82 + .byte 31 + .byte 30 + .byte 22 + .byte 194 + .byte 251 + .byte 69 + .byte 31 + .byte 155 + .byte 82 + .byte 235 + .byte 50 + .byte 78 + .byte 25 + .byte 82 + .byte 24 + .byte 192 + .byte 152 + .byte 84 + .byte 118 + .byte 38 + .byte 252 + .byte 79 + .byte 71 + .byte 66 + .byte 77 + .byte 212 + .byte 156 + .byte 16 + .byte 220 + .byte 38 + .byte 214 + .byte 197 + .byte 201 + .byte 179 + .byte 223 + .byte 69 + .byte 1 + .byte 17 + .byte 164 + .byte 198 + .byte 76 + .byte 98 + .byte 181 + .byte 62 + .byte 86 + .byte 23 + .byte 135 + .byte 231 + .byte 86 + .byte 152 + .byte 251 + .byte 182 + .byte 8 + .byte 49 + .byte 82 + .byte 7 + .byte 185 + .byte 2 + .byte 1 + .byte 38 + .byte 9 + .byte 79 + .byte 180 + .byte 78 + .byte 66 + .byte 231 + .byte 118 + .byte 214 + .byte 220 + .byte 131 + .byte 165 + .byte 241 + .byte 15 + .byte 70 + .byte 91 + .byte 141 + .byte 14 + .byte 45 + .byte 156 + .byte 123 + .byte 185 + .byte 230 + .byte 125 + .byte 59 + .byte 161 + .byte 224 + .byte 47 + .byte 98 + .byte 90 + .byte 13 + .byte 141 + .byte 191 + .byte 59 + .byte 148 + .byte 161 + .byte 78 + .byte 218 + .byte 248 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 12 + .byte 24 + .byte 186 + .byte 63 + .byte 171 + .byte 185 + .byte 58 + .byte 86 + .byte 239 + .byte 165 + .byte 94 + .byte 18 + .byte 108 + .byte 162 + .byte 157 + .byte 133 + .byte 155 + .byte 119 + .byte 207 + .byte 184 + .byte 147 + .byte 231 + .byte 77 + .byte 125 + .byte 159 + .byte 180 + .byte 239 + .byte 7 + .byte 6 + .byte 83 + .byte 82 + .byte 141 + .byte 171 + .byte 230 + .byte 8 + .byte 30 + .byte 135 + .byte 159 + .byte 114 + .byte 65 + .byte 10 + .byte 239 + .byte 74 + .byte 140 + .byte 167 + .byte 201 + .byte 160 + .byte 74 + .byte 239 + .byte 149 + .byte 58 + .byte 217 + .byte 175 + .byte 210 + .byte 0 + .byte 187 + .byte 240 + .byte 91 + .byte 44 + .byte 182 + .byte 216 + .byte 148 + .byte 109 + .byte 56 + .byte 167 + .byte 25 + .byte 155 + .byte 60 + .byte 148 + .byte 134 + .byte 9 + .byte 169 + .byte 218 + .byte 120 + .byte 248 + .byte 35 + .byte 211 + .byte 71 + .byte 182 + .byte 167 + .byte 120 + .byte 157 + .byte 252 + .byte 116 + .byte 17 + .byte 174 + .byte 202 + .byte 234 + .byte 119 + .byte 166 + .byte 49 + .byte 47 + .byte 84 + .byte 192 + .byte 200 + .byte 76 + .byte 5 + .byte 202 + .byte 81 + .byte 47 + .byte 149 + .byte 241 + .byte 104 + .byte 82 + .byte 43 + .byte 138 + .byte 91 + .byte 79 + .byte 172 + .byte 180 + .byte 20 + .byte 246 + .byte 1 + .byte 84 + .byte 217 + .byte 241 + .byte 104 + .byte 37 + .byte 77 + .byte 118 + .byte 17 + .byte 54 + .byte 73 + .byte 106 + .byte 62 + .byte 156 + .byte 239 + .byte 142 + + .text +.global saturnin_setup_key + .type saturnin_setup_key, @function +saturnin_setup_key: + movw r30,r24 + movw r26,r22 +.L__stack_usage = 2 + ld r18,X+ + ld r19,X+ + st Z,r18 + std Z+1,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+32,r18 + std Z+33,r19 + ld r18,X+ + ld r19,X+ + std Z+4,r18 + std Z+5,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+36,r18 + std Z+37,r19 + ld r18,X+ + ld r19,X+ + std Z+8,r18 + std Z+9,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+40,r18 + std Z+41,r19 + ld r18,X+ + ld r19,X+ + std Z+12,r18 + std Z+13,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+44,r18 + std Z+45,r19 + ld r18,X+ + ld r19,X+ + std Z+16,r18 + std Z+17,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+48,r18 + std Z+49,r19 + ld r18,X+ + ld r19,X+ + std Z+20,r18 + std Z+21,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+52,r18 + std Z+53,r19 + ld r18,X+ + ld r19,X+ + std Z+24,r18 + std Z+25,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+56,r18 + std Z+57,r19 + ld r18,X+ + ld r19,X+ + std Z+28,r18 + std Z+29,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+60,r18 + std Z+61,r19 + ld r18,X+ + ld r19,X+ + std Z+2,r18 + std Z+3,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+34,r18 + std Z+35,r19 + ld r18,X+ + ld r19,X+ + std Z+6,r18 + std Z+7,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+38,r18 + std Z+39,r19 + ld r18,X+ + ld r19,X+ + std Z+10,r18 + std Z+11,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+42,r18 + std Z+43,r19 + ld r18,X+ + ld r19,X+ + std Z+14,r18 + std Z+15,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+46,r18 + std Z+47,r19 + ld r18,X+ + ld r19,X+ + std Z+18,r18 + std Z+19,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+50,r18 + std Z+51,r19 + ld r18,X+ + ld r19,X+ + std Z+22,r18 + std Z+23,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+54,r18 + std Z+55,r19 + ld r18,X+ + ld r19,X+ + std Z+26,r18 + std Z+27,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+58,r18 + std Z+59,r19 + ld r18,X+ + ld r19,X+ + std Z+30,r18 + std Z+31,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+62,r18 + std Z+63,r19 + ret + .size saturnin_setup_key, .-saturnin_setup_key + + .text +.global saturnin_encrypt_block + .type saturnin_encrypt_block, @function +saturnin_encrypt_block: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r23 + push r22 + movw r30,r24 + movw r26,r20 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 52 + ld r2,X+ + ld r3,X+ + ld r0,Z + eor r2,r0 + ldd r0,Z+1 + eor r3,r0 + ld r6,X+ + ld r7,X+ + ldd r0,Z+4 + eor r6,r0 + ldd r0,Z+5 + eor r7,r0 + ld r10,X+ + ld r11,X+ + ldd r0,Z+8 + eor r10,r0 + ldd r0,Z+9 + eor r11,r0 + ld r14,X+ + ld r15,X+ + ldd r0,Z+12 + eor r14,r0 + ldd r0,Z+13 + eor r15,r0 + ld r20,X+ + ld r21,X+ + ldd r0,Z+16 + eor r20,r0 + ldd r0,Z+17 + eor r21,r0 + std Y+17,r20 + std Y+18,r21 + ld r20,X+ + ld r21,X+ + ldd r0,Z+20 + eor r20,r0 + ldd r0,Z+21 + eor r21,r0 + std Y+21,r20 + std Y+22,r21 + ld r20,X+ + ld r21,X+ + ldd r0,Z+24 + eor r20,r0 + ldd r0,Z+25 + eor r21,r0 + std Y+25,r20 + std Y+26,r21 + ld r20,X+ + ld r21,X+ + ldd r0,Z+28 + eor r20,r0 + ldd r0,Z+29 + eor r21,r0 + std Y+29,r20 + std Y+30,r21 + ld r4,X+ + ld r5,X+ + ldd r0,Z+2 + eor r4,r0 + ldd r0,Z+3 + eor r5,r0 + ld r8,X+ + ld r9,X+ + ldd r0,Z+6 + eor r8,r0 + ldd r0,Z+7 + eor r9,r0 + ld r12,X+ + ld r13,X+ + ldd r0,Z+10 + eor r12,r0 + ldd r0,Z+11 + eor r13,r0 + ld r24,X+ + ld r25,X+ + ldd r0,Z+14 + eor r24,r0 + ldd r0,Z+15 + eor r25,r0 + ld r20,X+ + ld r21,X+ + ldd r0,Z+18 + eor r20,r0 + ldd r0,Z+19 + eor r21,r0 + std Y+19,r20 + std Y+20,r21 + ld r20,X+ + ld r21,X+ + ldd r0,Z+22 + eor r20,r0 + ldd r0,Z+23 + eor r21,r0 + std Y+23,r20 + std Y+24,r21 + ld r20,X+ + ld r21,X+ + ldd r0,Z+26 + eor r20,r0 + ldd r0,Z+27 + eor r21,r0 + std Y+27,r20 + std Y+28,r21 + ld r20,X+ + ld r21,X+ + ldd r0,Z+30 + eor r20,r0 + ldd r0,Z+31 + eor r21,r0 + std Y+31,r20 + std Y+32,r21 + ldi r16,5 + cpi r18,60 + cpc r19,r1 + brcs 120f + ldi r16,8 + ldi r17,4 + add r18,r17 + adc r19,r1 +120: + lsl r18 + rol r19 + lsl r18 + rol r19 + rjmp 1447f +126: + mov r0,r6 + and r0,r10 + eor r2,r0 + mov r0,r7 + and r0,r11 + eor r3,r0 + mov r0,r8 + and r0,r12 + eor r4,r0 + mov r0,r9 + and r0,r13 + eor r5,r0 + mov r0,r2 + or r0,r14 + eor r6,r0 + mov r0,r3 + or r0,r15 + eor r7,r0 + mov r0,r4 + or r0,r24 + eor r8,r0 + mov r0,r5 + or r0,r25 + eor r9,r0 + mov r0,r6 + or r0,r10 + eor r14,r0 + mov r0,r7 + or r0,r11 + eor r15,r0 + mov r0,r8 + or r0,r12 + eor r24,r0 + mov r0,r9 + or r0,r13 + eor r25,r0 + mov r0,r6 + and r0,r14 + eor r10,r0 + mov r0,r7 + and r0,r15 + eor r11,r0 + mov r0,r8 + and r0,r24 + eor r12,r0 + mov r0,r9 + and r0,r25 + eor r13,r0 + mov r0,r2 + or r0,r10 + eor r6,r0 + mov r0,r3 + or r0,r11 + eor r7,r0 + mov r0,r4 + or r0,r12 + eor r8,r0 + mov r0,r5 + or r0,r13 + eor r9,r0 + mov r0,r6 + or r0,r14 + eor r2,r0 + mov r0,r7 + or r0,r15 + eor r3,r0 + mov r0,r8 + or r0,r24 + eor r4,r0 + mov r0,r9 + or r0,r25 + eor r5,r0 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Y+17 + ldd r3,Y+18 + ldd r4,Y+19 + ldd r5,Y+20 + ldd r6,Y+21 + ldd r7,Y+22 + ldd r8,Y+23 + ldd r9,Y+24 + ldd r10,Y+25 + ldd r11,Y+26 + ldd r12,Y+27 + ldd r13,Y+28 + ldd r14,Y+29 + ldd r15,Y+30 + ldd r24,Y+31 + ldd r25,Y+32 + mov r0,r6 + and r0,r10 + eor r2,r0 + mov r0,r7 + and r0,r11 + eor r3,r0 + mov r0,r8 + and r0,r12 + eor r4,r0 + mov r0,r9 + and r0,r13 + eor r5,r0 + mov r0,r2 + or r0,r14 + eor r6,r0 + mov r0,r3 + or r0,r15 + eor r7,r0 + mov r0,r4 + or r0,r24 + eor r8,r0 + mov r0,r5 + or r0,r25 + eor r9,r0 + mov r0,r6 + or r0,r10 + eor r14,r0 + mov r0,r7 + or r0,r11 + eor r15,r0 + mov r0,r8 + or r0,r12 + eor r24,r0 + mov r0,r9 + or r0,r13 + eor r25,r0 + mov r0,r6 + and r0,r14 + eor r10,r0 + mov r0,r7 + and r0,r15 + eor r11,r0 + mov r0,r8 + and r0,r24 + eor r12,r0 + mov r0,r9 + and r0,r25 + eor r13,r0 + mov r0,r2 + or r0,r10 + eor r6,r0 + mov r0,r3 + or r0,r11 + eor r7,r0 + mov r0,r4 + or r0,r12 + eor r8,r0 + mov r0,r5 + or r0,r13 + eor r9,r0 + mov r0,r6 + or r0,r14 + eor r2,r0 + mov r0,r7 + or r0,r15 + eor r3,r0 + mov r0,r8 + or r0,r24 + eor r4,r0 + mov r0,r9 + or r0,r25 + eor r5,r0 + ldd r0,Y+5 + eor r0,r14 + std Y+5,r0 + ldd r0,Y+6 + eor r0,r15 + std Y+6,r0 + ldd r0,Y+7 + eor r0,r24 + std Y+7,r0 + ldd r0,Y+8 + eor r0,r25 + std Y+8,r0 + ldd r0,Y+9 + eor r0,r6 + std Y+9,r0 + ldd r0,Y+10 + eor r0,r7 + std Y+10,r0 + ldd r0,Y+11 + eor r0,r8 + std Y+11,r0 + ldd r0,Y+12 + eor r0,r9 + std Y+12,r0 + ldd r0,Y+13 + eor r0,r2 + std Y+13,r0 + ldd r0,Y+14 + eor r0,r3 + std Y+14,r0 + ldd r0,Y+15 + eor r0,r4 + std Y+15,r0 + ldd r0,Y+16 + eor r0,r5 + std Y+16,r0 + ldd r0,Y+1 + eor r0,r10 + std Y+1,r0 + ldd r0,Y+2 + eor r0,r11 + std Y+2,r0 + ldd r0,Y+3 + eor r0,r12 + std Y+3,r0 + ldd r0,Y+4 + eor r0,r13 + std Y+4,r0 + movw r20,r14 + movw r22,r24 + movw r14,r6 + movw r24,r8 + movw r6,r2 + movw r8,r4 + movw r2,r10 + movw r4,r12 + movw r10,r20 + movw r12,r22 + eor r10,r14 + eor r11,r15 + eor r12,r24 + eor r13,r25 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+9 + ldd r23,Y+10 + eor r20,r22 + eor r21,r23 + std Y+5,r22 + std Y+6,r23 + ldd r22,Y+13 + ldd r23,Y+14 + std Y+9,r22 + std Y+10,r23 + ldd r22,Y+1 + ldd r23,Y+2 + std Y+13,r22 + std Y+14,r23 + std Y+1,r20 + std Y+2,r21 + ldd r20,Y+7 + ldd r21,Y+8 + ldd r22,Y+11 + ldd r23,Y+12 + eor r20,r22 + eor r21,r23 + std Y+7,r22 + std Y+8,r23 + ldd r22,Y+15 + ldd r23,Y+16 + std Y+11,r22 + std Y+12,r23 + ldd r22,Y+3 + ldd r23,Y+4 + std Y+15,r22 + std Y+16,r23 + std Y+3,r20 + std Y+4,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+9 + ldd r23,Y+10 + eor r20,r22 + eor r21,r23 + eor r22,r14 + eor r23,r15 + std Y+5,r22 + std Y+6,r23 + ldd r22,Y+13 + ldd r23,Y+14 + eor r22,r6 + eor r23,r7 + std Y+9,r22 + std Y+10,r23 + ldd r22,Y+1 + ldd r23,Y+2 + eor r22,r2 + eor r23,r3 + std Y+13,r22 + std Y+14,r23 + eor r20,r10 + eor r21,r11 + std Y+1,r20 + std Y+2,r21 + ldd r20,Y+7 + ldd r21,Y+8 + ldd r22,Y+11 + ldd r23,Y+12 + eor r20,r22 + eor r21,r23 + eor r22,r24 + eor r23,r25 + std Y+7,r22 + std Y+8,r23 + ldd r22,Y+15 + ldd r23,Y+16 + eor r22,r8 + eor r23,r9 + std Y+11,r22 + std Y+12,r23 + ldd r22,Y+3 + ldd r23,Y+4 + eor r22,r4 + eor r23,r5 + std Y+15,r22 + std Y+16,r23 + eor r20,r12 + eor r21,r13 + std Y+3,r20 + std Y+4,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + mov r0,r6 + and r0,r2 + eor r14,r0 + mov r0,r7 + and r0,r3 + eor r15,r0 + mov r0,r8 + and r0,r4 + eor r24,r0 + mov r0,r9 + and r0,r5 + eor r25,r0 + mov r0,r14 + or r0,r10 + eor r6,r0 + mov r0,r15 + or r0,r11 + eor r7,r0 + mov r0,r24 + or r0,r12 + eor r8,r0 + mov r0,r25 + or r0,r13 + eor r9,r0 + mov r0,r6 + or r0,r2 + eor r10,r0 + mov r0,r7 + or r0,r3 + eor r11,r0 + mov r0,r8 + or r0,r4 + eor r12,r0 + mov r0,r9 + or r0,r5 + eor r13,r0 + mov r0,r6 + and r0,r10 + eor r2,r0 + mov r0,r7 + and r0,r11 + eor r3,r0 + mov r0,r8 + and r0,r12 + eor r4,r0 + mov r0,r9 + and r0,r13 + eor r5,r0 + mov r0,r14 + or r0,r2 + eor r6,r0 + mov r0,r15 + or r0,r3 + eor r7,r0 + mov r0,r24 + or r0,r4 + eor r8,r0 + mov r0,r25 + or r0,r5 + eor r9,r0 + mov r0,r6 + or r0,r10 + eor r14,r0 + mov r0,r7 + or r0,r11 + eor r15,r0 + mov r0,r8 + or r0,r12 + eor r24,r0 + mov r0,r9 + or r0,r13 + eor r25,r0 + movw r20,r14 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r14,r22 + and r15,r22 + lsl r14 + rol r15 + or r14,r20 + or r15,r21 + movw r20,r24 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r24,r22 + and r25,r22 + lsl r24 + rol r25 + lsl r24 + rol r25 + lsl r24 + rol r25 + or r24,r20 + or r25,r21 + movw r20,r6 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r6,r22 + and r7,r22 + lsl r6 + rol r7 + or r6,r20 + or r7,r21 + movw r20,r8 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r8,r22 + and r9,r22 + lsl r8 + rol r9 + lsl r8 + rol r9 + lsl r8 + rol r9 + or r8,r20 + or r9,r21 + movw r20,r2 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r2,r22 + and r3,r22 + lsl r2 + rol r3 + or r2,r20 + or r3,r21 + movw r20,r4 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r4,r22 + and r5,r22 + lsl r4 + rol r5 + lsl r4 + rol r5 + lsl r4 + rol r5 + or r4,r20 + or r5,r21 + movw r20,r10 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r10,r22 + and r11,r22 + lsl r10 + rol r11 + or r10,r20 + or r11,r21 + movw r20,r12 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r12,r22 + and r13,r22 + lsl r12 + rol r13 + lsl r12 + rol r13 + lsl r12 + rol r13 + or r12,r20 + or r13,r21 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r3,Y+2 + ldd r4,Y+3 + ldd r5,Y+4 + ldd r6,Y+5 + ldd r7,Y+6 + ldd r8,Y+7 + ldd r9,Y+8 + ldd r10,Y+9 + ldd r11,Y+10 + ldd r12,Y+11 + ldd r13,Y+12 + ldd r14,Y+13 + ldd r15,Y+14 + ldd r24,Y+15 + ldd r25,Y+16 + mov r0,r10 + and r0,r14 + eor r6,r0 + mov r0,r11 + and r0,r15 + eor r7,r0 + mov r0,r12 + and r0,r24 + eor r8,r0 + mov r0,r13 + and r0,r25 + eor r9,r0 + mov r0,r6 + or r0,r2 + eor r10,r0 + mov r0,r7 + or r0,r3 + eor r11,r0 + mov r0,r8 + or r0,r4 + eor r12,r0 + mov r0,r9 + or r0,r5 + eor r13,r0 + mov r0,r10 + or r0,r14 + eor r2,r0 + mov r0,r11 + or r0,r15 + eor r3,r0 + mov r0,r12 + or r0,r24 + eor r4,r0 + mov r0,r13 + or r0,r25 + eor r5,r0 + mov r0,r10 + and r0,r2 + eor r14,r0 + mov r0,r11 + and r0,r3 + eor r15,r0 + mov r0,r12 + and r0,r4 + eor r24,r0 + mov r0,r13 + and r0,r5 + eor r25,r0 + mov r0,r6 + or r0,r14 + eor r10,r0 + mov r0,r7 + or r0,r15 + eor r11,r0 + mov r0,r8 + or r0,r24 + eor r12,r0 + mov r0,r9 + or r0,r25 + eor r13,r0 + mov r0,r10 + or r0,r2 + eor r6,r0 + mov r0,r11 + or r0,r3 + eor r7,r0 + mov r0,r12 + or r0,r4 + eor r8,r0 + mov r0,r13 + or r0,r5 + eor r9,r0 + movw r20,r8 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r8,r22 + and r9,r22 + lsl r8 + rol r9 + lsl r8 + rol r9 + or r8,r20 + or r9,r21 + movw r20,r12 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r12,r22 + and r13,r22 + lsl r12 + rol r13 + lsl r12 + rol r13 + or r12,r20 + or r13,r21 + movw r20,r24 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r24,r22 + and r25,r22 + lsl r24 + rol r25 + lsl r24 + rol r25 + or r24,r20 + or r25,r21 + movw r20,r4 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r4,r22 + and r5,r22 + lsl r4 + rol r5 + lsl r4 + rol r5 + or r4,r20 + or r5,r21 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Y+17 + ldd r3,Y+18 + ldd r4,Y+19 + ldd r5,Y+20 + ldd r6,Y+21 + ldd r7,Y+22 + ldd r8,Y+23 + ldd r9,Y+24 + ldd r10,Y+25 + ldd r11,Y+26 + ldd r12,Y+27 + ldd r13,Y+28 + ldd r14,Y+29 + ldd r15,Y+30 + ldd r24,Y+31 + ldd r25,Y+32 + ldd r0,Y+9 + eor r0,r10 + std Y+9,r0 + ldd r0,Y+10 + eor r0,r11 + std Y+10,r0 + ldd r0,Y+11 + eor r0,r12 + std Y+11,r0 + ldd r0,Y+12 + eor r0,r13 + std Y+12,r0 + ldd r0,Y+13 + eor r0,r6 + std Y+13,r0 + ldd r0,Y+14 + eor r0,r7 + std Y+14,r0 + ldd r0,Y+15 + eor r0,r8 + std Y+15,r0 + ldd r0,Y+16 + eor r0,r9 + std Y+16,r0 + ldd r0,Y+1 + eor r0,r14 + std Y+1,r0 + ldd r0,Y+2 + eor r0,r15 + std Y+2,r0 + ldd r0,Y+3 + eor r0,r24 + std Y+3,r0 + ldd r0,Y+4 + eor r0,r25 + std Y+4,r0 + ldd r0,Y+5 + eor r0,r2 + std Y+5,r0 + ldd r0,Y+6 + eor r0,r3 + std Y+6,r0 + ldd r0,Y+7 + eor r0,r4 + std Y+7,r0 + ldd r0,Y+8 + eor r0,r5 + std Y+8,r0 + movw r20,r10 + movw r22,r12 + movw r10,r6 + movw r12,r8 + movw r6,r14 + movw r8,r24 + movw r14,r2 + movw r24,r4 + movw r2,r20 + movw r4,r22 + eor r2,r10 + eor r3,r11 + eor r4,r12 + eor r5,r13 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+13 + ldd r23,Y+14 + eor r20,r22 + eor r21,r23 + std Y+9,r22 + std Y+10,r23 + ldd r22,Y+1 + ldd r23,Y+2 + std Y+13,r22 + std Y+14,r23 + ldd r22,Y+5 + ldd r23,Y+6 + std Y+1,r22 + std Y+2,r23 + std Y+5,r20 + std Y+6,r21 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r22,Y+15 + ldd r23,Y+16 + eor r20,r22 + eor r21,r23 + std Y+11,r22 + std Y+12,r23 + ldd r22,Y+3 + ldd r23,Y+4 + std Y+15,r22 + std Y+16,r23 + ldd r22,Y+7 + ldd r23,Y+8 + std Y+3,r22 + std Y+4,r23 + std Y+7,r20 + std Y+8,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+13 + ldd r23,Y+14 + eor r20,r22 + eor r21,r23 + eor r22,r10 + eor r23,r11 + std Y+9,r22 + std Y+10,r23 + ldd r22,Y+1 + ldd r23,Y+2 + eor r22,r6 + eor r23,r7 + std Y+13,r22 + std Y+14,r23 + ldd r22,Y+5 + ldd r23,Y+6 + eor r22,r14 + eor r23,r15 + std Y+1,r22 + std Y+2,r23 + eor r20,r2 + eor r21,r3 + std Y+5,r20 + std Y+6,r21 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r22,Y+15 + ldd r23,Y+16 + eor r20,r22 + eor r21,r23 + eor r22,r12 + eor r23,r13 + std Y+11,r22 + std Y+12,r23 + ldd r22,Y+3 + ldd r23,Y+4 + eor r22,r8 + eor r23,r9 + std Y+15,r22 + std Y+16,r23 + ldd r22,Y+7 + ldd r23,Y+8 + eor r22,r24 + eor r23,r25 + std Y+3,r22 + std Y+4,r23 + eor r20,r4 + eor r21,r5 + std Y+7,r20 + std Y+8,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + movw r20,r12 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r12,r22 + and r13,r22 + lsl r12 + rol r13 + or r12,r20 + or r13,r21 + movw r20,r10 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r10,r22 + and r11,r22 + lsl r10 + rol r11 + lsl r10 + rol r11 + lsl r10 + rol r11 + or r10,r20 + or r11,r21 + movw r20,r8 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r8,r22 + and r9,r22 + lsl r8 + rol r9 + or r8,r20 + or r9,r21 + movw r20,r6 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r6,r22 + and r7,r22 + lsl r6 + rol r7 + lsl r6 + rol r7 + lsl r6 + rol r7 + or r6,r20 + or r7,r21 + movw r20,r24 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r24,r22 + and r25,r22 + lsl r24 + rol r25 + or r24,r20 + or r25,r21 + movw r20,r14 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r14,r22 + and r15,r22 + lsl r14 + rol r15 + lsl r14 + rol r15 + lsl r14 + rol r15 + or r14,r20 + or r15,r21 + movw r20,r4 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r4,r22 + and r5,r22 + lsl r4 + rol r5 + or r4,r20 + or r5,r21 + movw r20,r2 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r2,r22 + and r3,r22 + lsl r2 + rol r3 + lsl r2 + rol r3 + lsl r2 + rol r3 + or r2,r20 + or r3,r21 + ldd r0,Z+48 + eor r10,r0 + ldd r0,Z+49 + eor r11,r0 + ldd r0,Z+50 + eor r12,r0 + ldd r0,Z+51 + eor r13,r0 + ldd r0,Z+52 + eor r6,r0 + ldd r0,Z+53 + eor r7,r0 + ldd r0,Z+54 + eor r8,r0 + ldd r0,Z+55 + eor r9,r0 + ldd r0,Z+56 + eor r14,r0 + ldd r0,Z+57 + eor r15,r0 + ldd r0,Z+58 + eor r24,r0 + ldd r0,Z+59 + eor r25,r0 + ldd r0,Z+60 + eor r2,r0 + ldd r0,Z+61 + eor r3,r0 + ldd r0,Z+62 + eor r4,r0 + ldd r0,Z+63 + eor r5,r0 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r3,Y+2 + ldd r4,Y+3 + ldd r5,Y+4 + ldd r6,Y+5 + ldd r7,Y+6 + ldd r8,Y+7 + ldd r9,Y+8 + ldd r10,Y+9 + ldd r11,Y+10 + ldd r12,Y+11 + ldd r13,Y+12 + ldd r14,Y+13 + ldd r15,Y+14 + ldd r24,Y+15 + ldd r25,Y+16 + movw r20,r12 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r12,r22 + and r13,r22 + lsl r12 + rol r13 + lsl r12 + rol r13 + or r12,r20 + or r13,r21 + movw r20,r24 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r24,r22 + and r25,r22 + lsl r24 + rol r25 + lsl r24 + rol r25 + or r24,r20 + or r25,r21 + movw r20,r4 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r4,r22 + and r5,r22 + lsl r4 + rol r5 + lsl r4 + rol r5 + or r4,r20 + or r5,r21 + movw r20,r8 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r8,r22 + and r9,r22 + lsl r8 + rol r9 + lsl r8 + rol r9 + or r8,r20 + or r9,r21 + ldd r0,Z+32 + eor r10,r0 + ldd r0,Z+33 + eor r11,r0 + ldd r0,Z+34 + eor r12,r0 + ldd r0,Z+35 + eor r13,r0 + ldd r0,Z+36 + eor r14,r0 + ldd r0,Z+37 + eor r15,r0 + ldd r0,Z+38 + eor r24,r0 + ldd r0,Z+39 + eor r25,r0 + ldd r0,Z+40 + eor r2,r0 + ldd r0,Z+41 + eor r3,r0 + ldd r0,Z+42 + eor r4,r0 + ldd r0,Z+43 + eor r5,r0 + ldd r0,Z+44 + eor r6,r0 + ldd r0,Z+45 + eor r7,r0 + ldd r0,Z+46 + eor r8,r0 + ldd r0,Z+47 + eor r9,r0 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r17,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r17 +#endif + add r31,r19 + mov r30,r18 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + inc r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + inc r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r22,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r22,Z +#elif defined(__AVR_TINY__) + ld r22,Z +#else + lpm + mov r22,r0 +#endif + inc r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r23,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r23,Z +#elif defined(__AVR_TINY__) + ld r23,Z +#else + lpm + mov r23,r0 +#endif + inc r18 + eor r10,r20 + eor r11,r21 + eor r12,r22 + eor r13,r23 +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + ret +1447: + rcall 126b + rcall 1453f + dec r16 + brne 1447b + rjmp 2622f +1453: + mov r0,r14 + and r0,r2 + eor r10,r0 + mov r0,r15 + and r0,r3 + eor r11,r0 + mov r0,r24 + and r0,r4 + eor r12,r0 + mov r0,r25 + and r0,r5 + eor r13,r0 + mov r0,r10 + or r0,r6 + eor r14,r0 + mov r0,r11 + or r0,r7 + eor r15,r0 + mov r0,r12 + or r0,r8 + eor r24,r0 + mov r0,r13 + or r0,r9 + eor r25,r0 + mov r0,r14 + or r0,r2 + eor r6,r0 + mov r0,r15 + or r0,r3 + eor r7,r0 + mov r0,r24 + or r0,r4 + eor r8,r0 + mov r0,r25 + or r0,r5 + eor r9,r0 + mov r0,r14 + and r0,r6 + eor r2,r0 + mov r0,r15 + and r0,r7 + eor r3,r0 + mov r0,r24 + and r0,r8 + eor r4,r0 + mov r0,r25 + and r0,r9 + eor r5,r0 + mov r0,r10 + or r0,r2 + eor r14,r0 + mov r0,r11 + or r0,r3 + eor r15,r0 + mov r0,r12 + or r0,r4 + eor r24,r0 + mov r0,r13 + or r0,r5 + eor r25,r0 + mov r0,r14 + or r0,r6 + eor r10,r0 + mov r0,r15 + or r0,r7 + eor r11,r0 + mov r0,r24 + or r0,r8 + eor r12,r0 + mov r0,r25 + or r0,r9 + eor r13,r0 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Y+17 + ldd r3,Y+18 + ldd r4,Y+19 + ldd r5,Y+20 + ldd r6,Y+21 + ldd r7,Y+22 + ldd r8,Y+23 + ldd r9,Y+24 + ldd r10,Y+25 + ldd r11,Y+26 + ldd r12,Y+27 + ldd r13,Y+28 + ldd r14,Y+29 + ldd r15,Y+30 + ldd r24,Y+31 + ldd r25,Y+32 + mov r0,r6 + and r0,r14 + eor r10,r0 + mov r0,r7 + and r0,r15 + eor r11,r0 + mov r0,r8 + and r0,r24 + eor r12,r0 + mov r0,r9 + and r0,r25 + eor r13,r0 + mov r0,r10 + or r0,r2 + eor r6,r0 + mov r0,r11 + or r0,r3 + eor r7,r0 + mov r0,r12 + or r0,r4 + eor r8,r0 + mov r0,r13 + or r0,r5 + eor r9,r0 + mov r0,r6 + or r0,r14 + eor r2,r0 + mov r0,r7 + or r0,r15 + eor r3,r0 + mov r0,r8 + or r0,r24 + eor r4,r0 + mov r0,r9 + or r0,r25 + eor r5,r0 + mov r0,r6 + and r0,r2 + eor r14,r0 + mov r0,r7 + and r0,r3 + eor r15,r0 + mov r0,r8 + and r0,r4 + eor r24,r0 + mov r0,r9 + and r0,r5 + eor r25,r0 + mov r0,r10 + or r0,r14 + eor r6,r0 + mov r0,r11 + or r0,r15 + eor r7,r0 + mov r0,r12 + or r0,r24 + eor r8,r0 + mov r0,r13 + or r0,r25 + eor r9,r0 + mov r0,r6 + or r0,r2 + eor r10,r0 + mov r0,r7 + or r0,r3 + eor r11,r0 + mov r0,r8 + or r0,r4 + eor r12,r0 + mov r0,r9 + or r0,r5 + eor r13,r0 + ldd r0,Y+13 + eor r0,r2 + std Y+13,r0 + ldd r0,Y+14 + eor r0,r3 + std Y+14,r0 + ldd r0,Y+15 + eor r0,r4 + std Y+15,r0 + ldd r0,Y+16 + eor r0,r5 + std Y+16,r0 + ldd r0,Y+1 + eor r0,r6 + std Y+1,r0 + ldd r0,Y+2 + eor r0,r7 + std Y+2,r0 + ldd r0,Y+3 + eor r0,r8 + std Y+3,r0 + ldd r0,Y+4 + eor r0,r9 + std Y+4,r0 + ldd r0,Y+5 + eor r0,r10 + std Y+5,r0 + ldd r0,Y+6 + eor r0,r11 + std Y+6,r0 + ldd r0,Y+7 + eor r0,r12 + std Y+7,r0 + ldd r0,Y+8 + eor r0,r13 + std Y+8,r0 + ldd r0,Y+9 + eor r0,r14 + std Y+9,r0 + ldd r0,Y+10 + eor r0,r15 + std Y+10,r0 + ldd r0,Y+11 + eor r0,r24 + std Y+11,r0 + ldd r0,Y+12 + eor r0,r25 + std Y+12,r0 + movw r20,r2 + movw r22,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r20 + movw r24,r22 + eor r14,r2 + eor r15,r3 + eor r24,r4 + eor r25,r5 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+1 + ldd r23,Y+2 + eor r20,r22 + eor r21,r23 + std Y+13,r22 + std Y+14,r23 + ldd r22,Y+5 + ldd r23,Y+6 + std Y+1,r22 + std Y+2,r23 + ldd r22,Y+9 + ldd r23,Y+10 + std Y+5,r22 + std Y+6,r23 + std Y+9,r20 + std Y+10,r21 + ldd r20,Y+15 + ldd r21,Y+16 + ldd r22,Y+3 + ldd r23,Y+4 + eor r20,r22 + eor r21,r23 + std Y+15,r22 + std Y+16,r23 + ldd r22,Y+7 + ldd r23,Y+8 + std Y+3,r22 + std Y+4,r23 + ldd r22,Y+11 + ldd r23,Y+12 + std Y+7,r22 + std Y+8,r23 + std Y+11,r20 + std Y+12,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+1 + ldd r23,Y+2 + eor r20,r22 + eor r21,r23 + eor r22,r2 + eor r23,r3 + std Y+13,r22 + std Y+14,r23 + ldd r22,Y+5 + ldd r23,Y+6 + eor r22,r6 + eor r23,r7 + std Y+1,r22 + std Y+2,r23 + ldd r22,Y+9 + ldd r23,Y+10 + eor r22,r10 + eor r23,r11 + std Y+5,r22 + std Y+6,r23 + eor r20,r14 + eor r21,r15 + std Y+9,r20 + std Y+10,r21 + ldd r20,Y+15 + ldd r21,Y+16 + ldd r22,Y+3 + ldd r23,Y+4 + eor r20,r22 + eor r21,r23 + eor r22,r4 + eor r23,r5 + std Y+15,r22 + std Y+16,r23 + ldd r22,Y+7 + ldd r23,Y+8 + eor r22,r8 + eor r23,r9 + std Y+3,r22 + std Y+4,r23 + ldd r22,Y+11 + ldd r23,Y+12 + eor r22,r12 + eor r23,r13 + std Y+7,r22 + std Y+8,r23 + eor r20,r24 + eor r21,r25 + std Y+11,r20 + std Y+12,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + mov r0,r6 + and r0,r10 + eor r2,r0 + mov r0,r7 + and r0,r11 + eor r3,r0 + mov r0,r8 + and r0,r12 + eor r4,r0 + mov r0,r9 + and r0,r13 + eor r5,r0 + mov r0,r2 + or r0,r14 + eor r6,r0 + mov r0,r3 + or r0,r15 + eor r7,r0 + mov r0,r4 + or r0,r24 + eor r8,r0 + mov r0,r5 + or r0,r25 + eor r9,r0 + mov r0,r6 + or r0,r10 + eor r14,r0 + mov r0,r7 + or r0,r11 + eor r15,r0 + mov r0,r8 + or r0,r12 + eor r24,r0 + mov r0,r9 + or r0,r13 + eor r25,r0 + mov r0,r6 + and r0,r14 + eor r10,r0 + mov r0,r7 + and r0,r15 + eor r11,r0 + mov r0,r8 + and r0,r24 + eor r12,r0 + mov r0,r9 + and r0,r25 + eor r13,r0 + mov r0,r2 + or r0,r10 + eor r6,r0 + mov r0,r3 + or r0,r11 + eor r7,r0 + mov r0,r4 + or r0,r12 + eor r8,r0 + mov r0,r5 + or r0,r13 + eor r9,r0 + mov r0,r6 + or r0,r14 + eor r2,r0 + mov r0,r7 + or r0,r15 + eor r3,r0 + mov r0,r8 + or r0,r24 + eor r4,r0 + mov r0,r9 + or r0,r25 + eor r5,r0 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + mov r0,r25 + mov r25,r24 + mov r24,r0 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + mov r0,r9 + mov r9,r8 + mov r8,r0 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + mov r0,r5 + mov r5,r4 + mov r4,r0 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + mov r0,r13 + mov r13,r12 + mov r12,r0 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r3,Y+2 + ldd r4,Y+3 + ldd r5,Y+4 + ldd r6,Y+5 + ldd r7,Y+6 + ldd r8,Y+7 + ldd r9,Y+8 + ldd r10,Y+9 + ldd r11,Y+10 + ldd r12,Y+11 + ldd r13,Y+12 + ldd r14,Y+13 + ldd r15,Y+14 + ldd r24,Y+15 + ldd r25,Y+16 + mov r0,r2 + and r0,r6 + eor r14,r0 + mov r0,r3 + and r0,r7 + eor r15,r0 + mov r0,r4 + and r0,r8 + eor r24,r0 + mov r0,r5 + and r0,r9 + eor r25,r0 + mov r0,r14 + or r0,r10 + eor r2,r0 + mov r0,r15 + or r0,r11 + eor r3,r0 + mov r0,r24 + or r0,r12 + eor r4,r0 + mov r0,r25 + or r0,r13 + eor r5,r0 + mov r0,r2 + or r0,r6 + eor r10,r0 + mov r0,r3 + or r0,r7 + eor r11,r0 + mov r0,r4 + or r0,r8 + eor r12,r0 + mov r0,r5 + or r0,r9 + eor r13,r0 + mov r0,r2 + and r0,r10 + eor r6,r0 + mov r0,r3 + and r0,r11 + eor r7,r0 + mov r0,r4 + and r0,r12 + eor r8,r0 + mov r0,r5 + and r0,r13 + eor r9,r0 + mov r0,r14 + or r0,r6 + eor r2,r0 + mov r0,r15 + or r0,r7 + eor r3,r0 + mov r0,r24 + or r0,r8 + eor r4,r0 + mov r0,r25 + or r0,r9 + eor r5,r0 + mov r0,r2 + or r0,r10 + eor r14,r0 + mov r0,r3 + or r0,r11 + eor r15,r0 + mov r0,r4 + or r0,r12 + eor r24,r0 + mov r0,r5 + or r0,r13 + eor r25,r0 + mov r0,r5 + mov r5,r4 + mov r4,r0 + mov r0,r9 + mov r9,r8 + mov r8,r0 + mov r0,r13 + mov r13,r12 + mov r12,r0 + mov r0,r25 + mov r25,r24 + mov r24,r0 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Y+17 + ldd r3,Y+18 + ldd r4,Y+19 + ldd r5,Y+20 + ldd r6,Y+21 + ldd r7,Y+22 + ldd r8,Y+23 + ldd r9,Y+24 + ldd r10,Y+25 + ldd r11,Y+26 + ldd r12,Y+27 + ldd r13,Y+28 + ldd r14,Y+29 + ldd r15,Y+30 + ldd r24,Y+31 + ldd r25,Y+32 + ldd r0,Y+1 + eor r0,r14 + std Y+1,r0 + ldd r0,Y+2 + eor r0,r15 + std Y+2,r0 + ldd r0,Y+3 + eor r0,r24 + std Y+3,r0 + ldd r0,Y+4 + eor r0,r25 + std Y+4,r0 + ldd r0,Y+5 + eor r0,r6 + std Y+5,r0 + ldd r0,Y+6 + eor r0,r7 + std Y+6,r0 + ldd r0,Y+7 + eor r0,r8 + std Y+7,r0 + ldd r0,Y+8 + eor r0,r9 + std Y+8,r0 + ldd r0,Y+9 + eor r0,r2 + std Y+9,r0 + ldd r0,Y+10 + eor r0,r3 + std Y+10,r0 + ldd r0,Y+11 + eor r0,r4 + std Y+11,r0 + ldd r0,Y+12 + eor r0,r5 + std Y+12,r0 + ldd r0,Y+13 + eor r0,r10 + std Y+13,r0 + ldd r0,Y+14 + eor r0,r11 + std Y+14,r0 + ldd r0,Y+15 + eor r0,r12 + std Y+15,r0 + ldd r0,Y+16 + eor r0,r13 + std Y+16,r0 + movw r20,r14 + movw r22,r24 + movw r14,r6 + movw r24,r8 + movw r6,r2 + movw r8,r4 + movw r2,r10 + movw r4,r12 + movw r10,r20 + movw r12,r22 + eor r10,r14 + eor r11,r15 + eor r12,r24 + eor r13,r25 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+5 + ldd r23,Y+6 + eor r20,r22 + eor r21,r23 + std Y+1,r22 + std Y+2,r23 + ldd r22,Y+9 + ldd r23,Y+10 + std Y+5,r22 + std Y+6,r23 + ldd r22,Y+13 + ldd r23,Y+14 + std Y+9,r22 + std Y+10,r23 + std Y+13,r20 + std Y+14,r21 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r22,Y+7 + ldd r23,Y+8 + eor r20,r22 + eor r21,r23 + std Y+3,r22 + std Y+4,r23 + ldd r22,Y+11 + ldd r23,Y+12 + std Y+7,r22 + std Y+8,r23 + ldd r22,Y+15 + ldd r23,Y+16 + std Y+11,r22 + std Y+12,r23 + std Y+15,r20 + std Y+16,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+5 + ldd r23,Y+6 + eor r20,r22 + eor r21,r23 + eor r22,r14 + eor r23,r15 + std Y+1,r22 + std Y+2,r23 + ldd r22,Y+9 + ldd r23,Y+10 + eor r22,r6 + eor r23,r7 + std Y+5,r22 + std Y+6,r23 + ldd r22,Y+13 + ldd r23,Y+14 + eor r22,r2 + eor r23,r3 + std Y+9,r22 + std Y+10,r23 + eor r20,r10 + eor r21,r11 + std Y+13,r20 + std Y+14,r21 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r22,Y+7 + ldd r23,Y+8 + eor r20,r22 + eor r21,r23 + eor r22,r24 + eor r23,r25 + std Y+3,r22 + std Y+4,r23 + ldd r22,Y+11 + ldd r23,Y+12 + eor r22,r8 + eor r23,r9 + std Y+7,r22 + std Y+8,r23 + ldd r22,Y+15 + ldd r23,Y+16 + eor r22,r4 + eor r23,r5 + std Y+11,r22 + std Y+12,r23 + eor r20,r12 + eor r21,r13 + std Y+15,r20 + std Y+16,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + mov r0,r15 + mov r15,r14 + mov r14,r0 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + mov r0,r7 + mov r7,r6 + mov r6,r0 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + mov r0,r3 + mov r3,r2 + mov r2,r0 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + mov r0,r11 + mov r11,r10 + mov r10,r0 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r17,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r17 +#endif + add r31,r19 + mov r30,r18 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + inc r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + inc r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r22,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r22,Z +#elif defined(__AVR_TINY__) + ld r22,Z +#else + lpm + mov r22,r0 +#endif + inc r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r23,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r23,Z +#elif defined(__AVR_TINY__) + ld r23,Z +#else + lpm + mov r23,r0 +#endif + inc r18 +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + ldd r0,Z+16 + eor r14,r0 + ldd r0,Z+17 + eor r15,r0 + ldd r0,Z+18 + eor r24,r0 + ldd r0,Z+19 + eor r25,r0 + ldd r0,Z+20 + eor r6,r0 + ldd r0,Z+21 + eor r7,r0 + ldd r0,Z+22 + eor r8,r0 + ldd r0,Z+23 + eor r9,r0 + ldd r0,Z+24 + eor r2,r0 + ldd r0,Z+25 + eor r3,r0 + ldd r0,Z+26 + eor r4,r0 + ldd r0,Z+27 + eor r5,r0 + ldd r0,Z+28 + eor r10,r0 + ldd r0,Z+29 + eor r11,r0 + ldd r0,Z+30 + eor r12,r0 + ldd r0,Z+31 + eor r13,r0 + std Y+17,r14 + std Y+18,r15 + std Y+19,r24 + std Y+20,r25 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r2 + std Y+26,r3 + std Y+27,r4 + std Y+28,r5 + std Y+29,r10 + std Y+30,r11 + std Y+31,r12 + std Y+32,r13 + ldd r2,Y+1 + ldd r3,Y+2 + ldd r4,Y+3 + ldd r5,Y+4 + ldd r6,Y+5 + ldd r7,Y+6 + ldd r8,Y+7 + ldd r9,Y+8 + ldd r10,Y+9 + ldd r11,Y+10 + ldd r12,Y+11 + ldd r13,Y+12 + ldd r14,Y+13 + ldd r15,Y+14 + ldd r24,Y+15 + ldd r25,Y+16 + mov r0,r5 + mov r5,r4 + mov r4,r0 + mov r0,r9 + mov r9,r8 + mov r8,r0 + mov r0,r13 + mov r13,r12 + mov r12,r0 + mov r0,r25 + mov r25,r24 + mov r24,r0 + ld r0,Z + eor r2,r0 + ldd r0,Z+1 + eor r3,r0 + ldd r0,Z+2 + eor r4,r0 + ldd r0,Z+3 + eor r5,r0 + ldd r0,Z+4 + eor r6,r0 + ldd r0,Z+5 + eor r7,r0 + ldd r0,Z+6 + eor r8,r0 + ldd r0,Z+7 + eor r9,r0 + ldd r0,Z+8 + eor r10,r0 + ldd r0,Z+9 + eor r11,r0 + ldd r0,Z+10 + eor r12,r0 + ldd r0,Z+11 + eor r13,r0 + ldd r0,Z+12 + eor r14,r0 + ldd r0,Z+13 + eor r15,r0 + ldd r0,Z+14 + eor r24,r0 + ldd r0,Z+15 + eor r25,r0 + eor r2,r20 + eor r3,r21 + eor r4,r22 + eor r5,r23 + ret +2622: + ldd r26,Y+33 + ldd r27,Y+34 + st X+,r2 + st X+,r3 + st X+,r6 + st X+,r7 + st X+,r10 + st X+,r11 + st X+,r14 + st X+,r15 + ldd r20,Y+17 + ldd r21,Y+18 + st X+,r20 + st X+,r21 + ldd r20,Y+21 + ldd r21,Y+22 + st X+,r20 + st X+,r21 + ldd r20,Y+25 + ldd r21,Y+26 + st X+,r20 + st X+,r21 + ldd r20,Y+29 + ldd r21,Y+30 + st X+,r20 + st X+,r21 + st X+,r4 + st X+,r5 + st X+,r8 + st X+,r9 + st X+,r12 + st X+,r13 + st X+,r24 + st X+,r25 + ldd r20,Y+19 + ldd r21,Y+20 + st X+,r20 + st X+,r21 + ldd r20,Y+23 + ldd r21,Y+24 + st X+,r20 + st X+,r21 + ldd r20,Y+27 + ldd r21,Y+28 + st X+,r20 + st X+,r21 + ldd r20,Y+31 + ldd r21,Y+32 + st X+,r20 + st X+,r21 + adiw r28,34 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size saturnin_encrypt_block, .-saturnin_encrypt_block + + .text +.global saturnin_decrypt_block + .type saturnin_decrypt_block, @function +saturnin_decrypt_block: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r23 + push r22 + movw r30,r24 + movw r26,r20 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 52 + ld r2,X+ + ld r3,X+ + ld r6,X+ + ld r7,X+ + ld r10,X+ + ld r11,X+ + ld r14,X+ + ld r15,X+ + ld r20,X+ + ld r21,X+ + std Y+17,r20 + std Y+18,r21 + ld r20,X+ + ld r21,X+ + std Y+21,r20 + std Y+22,r21 + ld r20,X+ + ld r21,X+ + std Y+25,r20 + std Y+26,r21 + ld r20,X+ + ld r21,X+ + std Y+29,r20 + std Y+30,r21 + ld r4,X+ + ld r5,X+ + ld r8,X+ + ld r9,X+ + ld r12,X+ + ld r13,X+ + ld r24,X+ + ld r25,X+ + ld r20,X+ + ld r21,X+ + std Y+19,r20 + std Y+20,r21 + ld r20,X+ + ld r21,X+ + std Y+23,r20 + std Y+24,r21 + ld r20,X+ + ld r21,X+ + std Y+27,r20 + std Y+28,r21 + ld r20,X+ + ld r21,X+ + std Y+31,r20 + std Y+32,r21 + ldi r16,10 + cpi r18,60 + cpc r19,r1 + brcs 56f + ldi r16,16 + ldi r17,4 + add r18,r17 + adc r19,r1 +56: + add r18,r16 + adc r19,r1 + lsl r18 + rol r19 + lsl r18 + rol r19 + rjmp 1233f +64: + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r17,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r17 +#endif + add r31,r19 + dec r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r23,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r23,Z +#elif defined(__AVR_TINY__) + ld r23,Z +#else + lpm + mov r23,r0 +#endif + dec r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r22,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r22,Z +#elif defined(__AVR_TINY__) + ld r22,Z +#else + lpm + mov r22,r0 +#endif + dec r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + dec r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + eor r2,r20 + eor r3,r21 + eor r4,r22 + eor r5,r23 +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + ld r0,Z + eor r2,r0 + ldd r0,Z+1 + eor r3,r0 + ldd r0,Z+2 + eor r4,r0 + ldd r0,Z+3 + eor r5,r0 + ldd r0,Z+4 + eor r6,r0 + ldd r0,Z+5 + eor r7,r0 + ldd r0,Z+6 + eor r8,r0 + ldd r0,Z+7 + eor r9,r0 + ldd r0,Z+8 + eor r10,r0 + ldd r0,Z+9 + eor r11,r0 + ldd r0,Z+10 + eor r12,r0 + ldd r0,Z+11 + eor r13,r0 + ldd r0,Z+12 + eor r14,r0 + ldd r0,Z+13 + eor r15,r0 + ldd r0,Z+14 + eor r24,r0 + ldd r0,Z+15 + eor r25,r0 + mov r0,r5 + mov r5,r4 + mov r4,r0 + mov r0,r9 + mov r9,r8 + mov r8,r0 + mov r0,r13 + mov r13,r12 + mov r12,r0 + mov r0,r25 + mov r25,r24 + mov r24,r0 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r14,Y+17 + ldd r15,Y+18 + ldd r24,Y+19 + ldd r25,Y+20 + ldd r6,Y+21 + ldd r7,Y+22 + ldd r8,Y+23 + ldd r9,Y+24 + ldd r2,Y+25 + ldd r3,Y+26 + ldd r4,Y+27 + ldd r5,Y+28 + ldd r10,Y+29 + ldd r11,Y+30 + ldd r12,Y+31 + ldd r13,Y+32 + ldd r0,Z+16 + eor r14,r0 + ldd r0,Z+17 + eor r15,r0 + ldd r0,Z+18 + eor r24,r0 + ldd r0,Z+19 + eor r25,r0 + ldd r0,Z+20 + eor r6,r0 + ldd r0,Z+21 + eor r7,r0 + ldd r0,Z+22 + eor r8,r0 + ldd r0,Z+23 + eor r9,r0 + ldd r0,Z+24 + eor r2,r0 + ldd r0,Z+25 + eor r3,r0 + ldd r0,Z+26 + eor r4,r0 + ldd r0,Z+27 + eor r5,r0 + ldd r0,Z+28 + eor r10,r0 + ldd r0,Z+29 + eor r11,r0 + ldd r0,Z+30 + eor r12,r0 + ldd r0,Z+31 + eor r13,r0 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + mov r0,r25 + mov r25,r24 + mov r24,r0 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + mov r0,r9 + mov r9,r8 + mov r8,r0 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + mov r0,r5 + mov r5,r4 + mov r4,r0 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + mov r0,r13 + mov r13,r12 + mov r12,r0 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+13 + ldd r21,Y+14 + eor r20,r10 + eor r21,r11 + ldd r22,Y+9 + ldd r23,Y+10 + eor r22,r2 + eor r23,r3 + std Y+13,r22 + std Y+14,r23 + ldd r22,Y+5 + ldd r23,Y+6 + eor r22,r6 + eor r23,r7 + std Y+9,r22 + std Y+10,r23 + ldd r22,Y+1 + ldd r23,Y+2 + eor r22,r14 + eor r23,r15 + eor r20,r22 + eor r21,r23 + std Y+5,r22 + std Y+6,r23 + std Y+1,r20 + std Y+2,r21 + ldd r20,Y+15 + ldd r21,Y+16 + eor r20,r12 + eor r21,r13 + ldd r22,Y+11 + ldd r23,Y+12 + eor r22,r4 + eor r23,r5 + std Y+15,r22 + std Y+16,r23 + ldd r22,Y+7 + ldd r23,Y+8 + eor r22,r8 + eor r23,r9 + std Y+11,r22 + std Y+12,r23 + ldd r22,Y+3 + ldd r23,Y+4 + eor r22,r24 + eor r23,r25 + eor r20,r22 + eor r21,r23 + std Y+7,r22 + std Y+8,r23 + std Y+3,r20 + std Y+4,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+9 + ldd r23,Y+10 + std Y+13,r22 + std Y+14,r23 + ldd r22,Y+5 + ldd r23,Y+6 + std Y+9,r22 + std Y+10,r23 + ldd r22,Y+1 + ldd r23,Y+2 + eor r20,r22 + eor r21,r23 + std Y+5,r22 + std Y+6,r23 + std Y+1,r20 + std Y+2,r21 + ldd r20,Y+15 + ldd r21,Y+16 + ldd r22,Y+11 + ldd r23,Y+12 + std Y+15,r22 + std Y+16,r23 + ldd r22,Y+7 + ldd r23,Y+8 + std Y+11,r22 + std Y+12,r23 + ldd r22,Y+3 + ldd r23,Y+4 + eor r20,r22 + eor r21,r23 + std Y+7,r22 + std Y+8,r23 + std Y+3,r20 + std Y+4,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + movw r20,r10 + movw r22,r12 + movw r10,r2 + movw r12,r4 + movw r2,r6 + movw r4,r8 + movw r6,r14 + movw r8,r24 + movw r14,r20 + movw r24,r22 + eor r14,r6 + eor r15,r7 + eor r24,r8 + eor r25,r9 + ldd r0,Y+1 + eor r0,r14 + std Y+1,r0 + ldd r0,Y+2 + eor r0,r15 + std Y+2,r0 + ldd r0,Y+3 + eor r0,r24 + std Y+3,r0 + ldd r0,Y+4 + eor r0,r25 + std Y+4,r0 + ldd r0,Y+5 + eor r0,r6 + std Y+5,r0 + ldd r0,Y+6 + eor r0,r7 + std Y+6,r0 + ldd r0,Y+7 + eor r0,r8 + std Y+7,r0 + ldd r0,Y+8 + eor r0,r9 + std Y+8,r0 + ldd r0,Y+9 + eor r0,r2 + std Y+9,r0 + ldd r0,Y+10 + eor r0,r3 + std Y+10,r0 + ldd r0,Y+11 + eor r0,r4 + std Y+11,r0 + ldd r0,Y+12 + eor r0,r5 + std Y+12,r0 + ldd r0,Y+13 + eor r0,r10 + std Y+13,r0 + ldd r0,Y+14 + eor r0,r11 + std Y+14,r0 + ldd r0,Y+15 + eor r0,r12 + std Y+15,r0 + ldd r0,Y+16 + eor r0,r13 + std Y+16,r0 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r3,Y+2 + ldd r4,Y+3 + ldd r5,Y+4 + ldd r6,Y+5 + ldd r7,Y+6 + ldd r8,Y+7 + ldd r9,Y+8 + ldd r10,Y+9 + ldd r11,Y+10 + ldd r12,Y+11 + ldd r13,Y+12 + ldd r14,Y+13 + ldd r15,Y+14 + ldd r24,Y+15 + ldd r25,Y+16 + mov r0,r5 + mov r5,r4 + mov r4,r0 + mov r0,r9 + mov r9,r8 + mov r8,r0 + mov r0,r13 + mov r13,r12 + mov r12,r0 + mov r0,r25 + mov r25,r24 + mov r24,r0 + mov r0,r2 + or r0,r10 + eor r14,r0 + mov r0,r3 + or r0,r11 + eor r15,r0 + mov r0,r4 + or r0,r12 + eor r24,r0 + mov r0,r5 + or r0,r13 + eor r25,r0 + mov r0,r14 + or r0,r6 + eor r2,r0 + mov r0,r15 + or r0,r7 + eor r3,r0 + mov r0,r24 + or r0,r8 + eor r4,r0 + mov r0,r25 + or r0,r9 + eor r5,r0 + mov r0,r2 + and r0,r10 + eor r6,r0 + mov r0,r3 + and r0,r11 + eor r7,r0 + mov r0,r4 + and r0,r12 + eor r8,r0 + mov r0,r5 + and r0,r13 + eor r9,r0 + mov r0,r2 + or r0,r6 + eor r10,r0 + mov r0,r3 + or r0,r7 + eor r11,r0 + mov r0,r4 + or r0,r8 + eor r12,r0 + mov r0,r5 + or r0,r9 + eor r13,r0 + mov r0,r14 + or r0,r10 + eor r2,r0 + mov r0,r15 + or r0,r11 + eor r3,r0 + mov r0,r24 + or r0,r12 + eor r4,r0 + mov r0,r25 + or r0,r13 + eor r5,r0 + mov r0,r2 + and r0,r6 + eor r14,r0 + mov r0,r3 + and r0,r7 + eor r15,r0 + mov r0,r4 + and r0,r8 + eor r24,r0 + mov r0,r5 + and r0,r9 + eor r25,r0 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Y+17 + ldd r3,Y+18 + ldd r4,Y+19 + ldd r5,Y+20 + ldd r6,Y+21 + ldd r7,Y+22 + ldd r8,Y+23 + ldd r9,Y+24 + ldd r10,Y+25 + ldd r11,Y+26 + ldd r12,Y+27 + ldd r13,Y+28 + ldd r14,Y+29 + ldd r15,Y+30 + ldd r24,Y+31 + ldd r25,Y+32 + mov r0,r15 + mov r15,r14 + mov r14,r0 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + mov r0,r7 + mov r7,r6 + mov r6,r0 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + mov r0,r3 + mov r3,r2 + mov r2,r0 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + mov r0,r11 + mov r11,r10 + mov r10,r0 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + mov r0,r6 + or r0,r14 + eor r2,r0 + mov r0,r7 + or r0,r15 + eor r3,r0 + mov r0,r8 + or r0,r24 + eor r4,r0 + mov r0,r9 + or r0,r25 + eor r5,r0 + mov r0,r2 + or r0,r10 + eor r6,r0 + mov r0,r3 + or r0,r11 + eor r7,r0 + mov r0,r4 + or r0,r12 + eor r8,r0 + mov r0,r5 + or r0,r13 + eor r9,r0 + mov r0,r6 + and r0,r14 + eor r10,r0 + mov r0,r7 + and r0,r15 + eor r11,r0 + mov r0,r8 + and r0,r24 + eor r12,r0 + mov r0,r9 + and r0,r25 + eor r13,r0 + mov r0,r6 + or r0,r10 + eor r14,r0 + mov r0,r7 + or r0,r11 + eor r15,r0 + mov r0,r8 + or r0,r12 + eor r24,r0 + mov r0,r9 + or r0,r13 + eor r25,r0 + mov r0,r2 + or r0,r14 + eor r6,r0 + mov r0,r3 + or r0,r15 + eor r7,r0 + mov r0,r4 + or r0,r24 + eor r8,r0 + mov r0,r5 + or r0,r25 + eor r9,r0 + mov r0,r6 + and r0,r10 + eor r2,r0 + mov r0,r7 + and r0,r11 + eor r3,r0 + mov r0,r8 + and r0,r12 + eor r4,r0 + mov r0,r9 + and r0,r13 + eor r5,r0 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+9 + ldd r21,Y+10 + eor r20,r14 + eor r21,r15 + ldd r22,Y+5 + ldd r23,Y+6 + eor r22,r10 + eor r23,r11 + std Y+9,r22 + std Y+10,r23 + ldd r22,Y+1 + ldd r23,Y+2 + eor r22,r6 + eor r23,r7 + std Y+5,r22 + std Y+6,r23 + ldd r22,Y+13 + ldd r23,Y+14 + eor r22,r2 + eor r23,r3 + eor r20,r22 + eor r21,r23 + std Y+1,r22 + std Y+2,r23 + std Y+13,r20 + std Y+14,r21 + ldd r20,Y+11 + ldd r21,Y+12 + eor r20,r24 + eor r21,r25 + ldd r22,Y+7 + ldd r23,Y+8 + eor r22,r12 + eor r23,r13 + std Y+11,r22 + std Y+12,r23 + ldd r22,Y+3 + ldd r23,Y+4 + eor r22,r8 + eor r23,r9 + std Y+7,r22 + std Y+8,r23 + ldd r22,Y+15 + ldd r23,Y+16 + eor r22,r4 + eor r23,r5 + eor r20,r22 + eor r21,r23 + std Y+3,r22 + std Y+4,r23 + std Y+15,r20 + std Y+16,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+5 + ldd r23,Y+6 + std Y+9,r22 + std Y+10,r23 + ldd r22,Y+1 + ldd r23,Y+2 + std Y+5,r22 + std Y+6,r23 + ldd r22,Y+13 + ldd r23,Y+14 + eor r20,r22 + eor r21,r23 + std Y+1,r22 + std Y+2,r23 + std Y+13,r20 + std Y+14,r21 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r22,Y+7 + ldd r23,Y+8 + std Y+11,r22 + std Y+12,r23 + ldd r22,Y+3 + ldd r23,Y+4 + std Y+7,r22 + std Y+8,r23 + ldd r22,Y+15 + ldd r23,Y+16 + eor r20,r22 + eor r21,r23 + std Y+3,r22 + std Y+4,r23 + std Y+15,r20 + std Y+16,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + movw r20,r14 + movw r22,r24 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r20 + movw r4,r22 + eor r2,r6 + eor r3,r7 + eor r4,r8 + eor r5,r9 + ldd r0,Y+13 + eor r0,r2 + std Y+13,r0 + ldd r0,Y+14 + eor r0,r3 + std Y+14,r0 + ldd r0,Y+15 + eor r0,r4 + std Y+15,r0 + ldd r0,Y+16 + eor r0,r5 + std Y+16,r0 + ldd r0,Y+1 + eor r0,r6 + std Y+1,r0 + ldd r0,Y+2 + eor r0,r7 + std Y+2,r0 + ldd r0,Y+3 + eor r0,r8 + std Y+3,r0 + ldd r0,Y+4 + eor r0,r9 + std Y+4,r0 + ldd r0,Y+5 + eor r0,r10 + std Y+5,r0 + ldd r0,Y+6 + eor r0,r11 + std Y+6,r0 + ldd r0,Y+7 + eor r0,r12 + std Y+7,r0 + ldd r0,Y+8 + eor r0,r13 + std Y+8,r0 + ldd r0,Y+9 + eor r0,r14 + std Y+9,r0 + ldd r0,Y+10 + eor r0,r15 + std Y+10,r0 + ldd r0,Y+11 + eor r0,r24 + std Y+11,r0 + ldd r0,Y+12 + eor r0,r25 + std Y+12,r0 + mov r0,r6 + or r0,r2 + eor r10,r0 + mov r0,r7 + or r0,r3 + eor r11,r0 + mov r0,r8 + or r0,r4 + eor r12,r0 + mov r0,r9 + or r0,r5 + eor r13,r0 + mov r0,r10 + or r0,r14 + eor r6,r0 + mov r0,r11 + or r0,r15 + eor r7,r0 + mov r0,r12 + or r0,r24 + eor r8,r0 + mov r0,r13 + or r0,r25 + eor r9,r0 + mov r0,r6 + and r0,r2 + eor r14,r0 + mov r0,r7 + and r0,r3 + eor r15,r0 + mov r0,r8 + and r0,r4 + eor r24,r0 + mov r0,r9 + and r0,r5 + eor r25,r0 + mov r0,r6 + or r0,r14 + eor r2,r0 + mov r0,r7 + or r0,r15 + eor r3,r0 + mov r0,r8 + or r0,r24 + eor r4,r0 + mov r0,r9 + or r0,r25 + eor r5,r0 + mov r0,r10 + or r0,r2 + eor r6,r0 + mov r0,r11 + or r0,r3 + eor r7,r0 + mov r0,r12 + or r0,r4 + eor r8,r0 + mov r0,r13 + or r0,r5 + eor r9,r0 + mov r0,r6 + and r0,r14 + eor r10,r0 + mov r0,r7 + and r0,r15 + eor r11,r0 + mov r0,r8 + and r0,r24 + eor r12,r0 + mov r0,r9 + and r0,r25 + eor r13,r0 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r3,Y+2 + ldd r4,Y+3 + ldd r5,Y+4 + ldd r6,Y+5 + ldd r7,Y+6 + ldd r8,Y+7 + ldd r9,Y+8 + ldd r10,Y+9 + ldd r11,Y+10 + ldd r12,Y+11 + ldd r13,Y+12 + ldd r14,Y+13 + ldd r15,Y+14 + ldd r24,Y+15 + ldd r25,Y+16 + mov r0,r14 + or r0,r6 + eor r10,r0 + mov r0,r15 + or r0,r7 + eor r11,r0 + mov r0,r24 + or r0,r8 + eor r12,r0 + mov r0,r25 + or r0,r9 + eor r13,r0 + mov r0,r10 + or r0,r2 + eor r14,r0 + mov r0,r11 + or r0,r3 + eor r15,r0 + mov r0,r12 + or r0,r4 + eor r24,r0 + mov r0,r13 + or r0,r5 + eor r25,r0 + mov r0,r14 + and r0,r6 + eor r2,r0 + mov r0,r15 + and r0,r7 + eor r3,r0 + mov r0,r24 + and r0,r8 + eor r4,r0 + mov r0,r25 + and r0,r9 + eor r5,r0 + mov r0,r14 + or r0,r2 + eor r6,r0 + mov r0,r15 + or r0,r3 + eor r7,r0 + mov r0,r24 + or r0,r4 + eor r8,r0 + mov r0,r25 + or r0,r5 + eor r9,r0 + mov r0,r10 + or r0,r6 + eor r14,r0 + mov r0,r11 + or r0,r7 + eor r15,r0 + mov r0,r12 + or r0,r8 + eor r24,r0 + mov r0,r13 + or r0,r9 + eor r25,r0 + mov r0,r14 + and r0,r2 + eor r10,r0 + mov r0,r15 + and r0,r3 + eor r11,r0 + mov r0,r24 + and r0,r4 + eor r12,r0 + mov r0,r25 + and r0,r5 + eor r13,r0 + ret +1233: + rcall 64b + rcall 1239f + subi r16,2 + brne 1233b + rjmp 2560f +1239: + ldd r0,Z+32 + eor r10,r0 + ldd r0,Z+33 + eor r11,r0 + ldd r0,Z+34 + eor r12,r0 + ldd r0,Z+35 + eor r13,r0 + ldd r0,Z+36 + eor r14,r0 + ldd r0,Z+37 + eor r15,r0 + ldd r0,Z+38 + eor r24,r0 + ldd r0,Z+39 + eor r25,r0 + ldd r0,Z+40 + eor r2,r0 + ldd r0,Z+41 + eor r3,r0 + ldd r0,Z+42 + eor r4,r0 + ldd r0,Z+43 + eor r5,r0 + ldd r0,Z+44 + eor r6,r0 + ldd r0,Z+45 + eor r7,r0 + ldd r0,Z+46 + eor r8,r0 + ldd r0,Z+47 + eor r9,r0 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r17,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r17 +#endif + add r31,r19 + dec r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r23,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r23,Z +#elif defined(__AVR_TINY__) + ld r23,Z +#else + lpm + mov r23,r0 +#endif + dec r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r22,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r22,Z +#elif defined(__AVR_TINY__) + ld r22,Z +#else + lpm + mov r22,r0 +#endif + dec r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + dec r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + eor r10,r20 + eor r11,r21 + eor r12,r22 + eor r13,r23 +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + movw r20,r12 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r12,r22 + and r13,r22 + lsl r12 + rol r13 + lsl r12 + rol r13 + or r12,r20 + or r13,r21 + movw r20,r24 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r24,r22 + and r25,r22 + lsl r24 + rol r25 + lsl r24 + rol r25 + or r24,r20 + or r25,r21 + movw r20,r4 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r4,r22 + and r5,r22 + lsl r4 + rol r5 + lsl r4 + rol r5 + or r4,r20 + or r5,r21 + movw r20,r8 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r8,r22 + and r9,r22 + lsl r8 + rol r9 + lsl r8 + rol r9 + or r8,r20 + or r9,r21 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Y+17 + ldd r3,Y+18 + ldd r4,Y+19 + ldd r5,Y+20 + ldd r6,Y+21 + ldd r7,Y+22 + ldd r8,Y+23 + ldd r9,Y+24 + ldd r10,Y+25 + ldd r11,Y+26 + ldd r12,Y+27 + ldd r13,Y+28 + ldd r14,Y+29 + ldd r15,Y+30 + ldd r24,Y+31 + ldd r25,Y+32 + ldd r0,Z+48 + eor r10,r0 + ldd r0,Z+49 + eor r11,r0 + ldd r0,Z+50 + eor r12,r0 + ldd r0,Z+51 + eor r13,r0 + ldd r0,Z+52 + eor r6,r0 + ldd r0,Z+53 + eor r7,r0 + ldd r0,Z+54 + eor r8,r0 + ldd r0,Z+55 + eor r9,r0 + ldd r0,Z+56 + eor r14,r0 + ldd r0,Z+57 + eor r15,r0 + ldd r0,Z+58 + eor r24,r0 + ldd r0,Z+59 + eor r25,r0 + ldd r0,Z+60 + eor r2,r0 + ldd r0,Z+61 + eor r3,r0 + ldd r0,Z+62 + eor r4,r0 + ldd r0,Z+63 + eor r5,r0 + movw r20,r10 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r10,r22 + and r11,r22 + lsl r10 + rol r11 + or r10,r20 + or r11,r21 + movw r20,r12 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r12,r22 + and r13,r22 + lsl r12 + rol r13 + lsl r12 + rol r13 + lsl r12 + rol r13 + or r12,r20 + or r13,r21 + movw r20,r6 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r6,r22 + and r7,r22 + lsl r6 + rol r7 + or r6,r20 + or r7,r21 + movw r20,r8 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r8,r22 + and r9,r22 + lsl r8 + rol r9 + lsl r8 + rol r9 + lsl r8 + rol r9 + or r8,r20 + or r9,r21 + movw r20,r14 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r14,r22 + and r15,r22 + lsl r14 + rol r15 + or r14,r20 + or r15,r21 + movw r20,r24 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r24,r22 + and r25,r22 + lsl r24 + rol r25 + lsl r24 + rol r25 + lsl r24 + rol r25 + or r24,r20 + or r25,r21 + movw r20,r2 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r2,r22 + and r3,r22 + lsl r2 + rol r3 + or r2,r20 + or r3,r21 + movw r20,r4 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r4,r22 + and r5,r22 + lsl r4 + rol r5 + lsl r4 + rol r5 + lsl r4 + rol r5 + or r4,r20 + or r5,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+5 + ldd r21,Y+6 + eor r20,r2 + eor r21,r3 + ldd r22,Y+1 + ldd r23,Y+2 + eor r22,r14 + eor r23,r15 + std Y+5,r22 + std Y+6,r23 + ldd r22,Y+13 + ldd r23,Y+14 + eor r22,r6 + eor r23,r7 + std Y+1,r22 + std Y+2,r23 + ldd r22,Y+9 + ldd r23,Y+10 + eor r22,r10 + eor r23,r11 + eor r20,r22 + eor r21,r23 + std Y+13,r22 + std Y+14,r23 + std Y+9,r20 + std Y+10,r21 + ldd r20,Y+7 + ldd r21,Y+8 + eor r20,r4 + eor r21,r5 + ldd r22,Y+3 + ldd r23,Y+4 + eor r22,r24 + eor r23,r25 + std Y+7,r22 + std Y+8,r23 + ldd r22,Y+15 + ldd r23,Y+16 + eor r22,r8 + eor r23,r9 + std Y+3,r22 + std Y+4,r23 + ldd r22,Y+11 + ldd r23,Y+12 + eor r22,r12 + eor r23,r13 + eor r20,r22 + eor r21,r23 + std Y+15,r22 + std Y+16,r23 + std Y+11,r20 + std Y+12,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+1 + ldd r23,Y+2 + std Y+5,r22 + std Y+6,r23 + ldd r22,Y+13 + ldd r23,Y+14 + std Y+1,r22 + std Y+2,r23 + ldd r22,Y+9 + ldd r23,Y+10 + eor r20,r22 + eor r21,r23 + std Y+13,r22 + std Y+14,r23 + std Y+9,r20 + std Y+10,r21 + ldd r20,Y+7 + ldd r21,Y+8 + ldd r22,Y+3 + ldd r23,Y+4 + std Y+7,r22 + std Y+8,r23 + ldd r22,Y+15 + ldd r23,Y+16 + std Y+3,r22 + std Y+4,r23 + ldd r22,Y+11 + ldd r23,Y+12 + eor r20,r22 + eor r21,r23 + std Y+15,r22 + std Y+16,r23 + std Y+11,r20 + std Y+12,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + movw r20,r2 + movw r22,r4 + movw r2,r14 + movw r4,r24 + movw r14,r6 + movw r24,r8 + movw r6,r10 + movw r8,r12 + movw r10,r20 + movw r12,r22 + eor r10,r6 + eor r11,r7 + eor r12,r8 + eor r13,r9 + ldd r0,Y+9 + eor r0,r10 + std Y+9,r0 + ldd r0,Y+10 + eor r0,r11 + std Y+10,r0 + ldd r0,Y+11 + eor r0,r12 + std Y+11,r0 + ldd r0,Y+12 + eor r0,r13 + std Y+12,r0 + ldd r0,Y+13 + eor r0,r6 + std Y+13,r0 + ldd r0,Y+14 + eor r0,r7 + std Y+14,r0 + ldd r0,Y+15 + eor r0,r8 + std Y+15,r0 + ldd r0,Y+16 + eor r0,r9 + std Y+16,r0 + ldd r0,Y+1 + eor r0,r14 + std Y+1,r0 + ldd r0,Y+2 + eor r0,r15 + std Y+2,r0 + ldd r0,Y+3 + eor r0,r24 + std Y+3,r0 + ldd r0,Y+4 + eor r0,r25 + std Y+4,r0 + ldd r0,Y+5 + eor r0,r2 + std Y+5,r0 + ldd r0,Y+6 + eor r0,r3 + std Y+6,r0 + ldd r0,Y+7 + eor r0,r4 + std Y+7,r0 + ldd r0,Y+8 + eor r0,r5 + std Y+8,r0 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r3,Y+2 + ldd r4,Y+3 + ldd r5,Y+4 + ldd r6,Y+5 + ldd r7,Y+6 + ldd r8,Y+7 + ldd r9,Y+8 + ldd r10,Y+9 + ldd r11,Y+10 + ldd r12,Y+11 + ldd r13,Y+12 + ldd r14,Y+13 + ldd r15,Y+14 + ldd r24,Y+15 + ldd r25,Y+16 + movw r20,r8 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r8,r22 + and r9,r22 + lsl r8 + rol r9 + lsl r8 + rol r9 + or r8,r20 + or r9,r21 + movw r20,r12 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r12,r22 + and r13,r22 + lsl r12 + rol r13 + lsl r12 + rol r13 + or r12,r20 + or r13,r21 + movw r20,r24 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r24,r22 + and r25,r22 + lsl r24 + rol r25 + lsl r24 + rol r25 + or r24,r20 + or r25,r21 + movw r20,r4 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r4,r22 + and r5,r22 + lsl r4 + rol r5 + lsl r4 + rol r5 + or r4,r20 + or r5,r21 + mov r0,r10 + or r0,r2 + eor r6,r0 + mov r0,r11 + or r0,r3 + eor r7,r0 + mov r0,r12 + or r0,r4 + eor r8,r0 + mov r0,r13 + or r0,r5 + eor r9,r0 + mov r0,r6 + or r0,r14 + eor r10,r0 + mov r0,r7 + or r0,r15 + eor r11,r0 + mov r0,r8 + or r0,r24 + eor r12,r0 + mov r0,r9 + or r0,r25 + eor r13,r0 + mov r0,r10 + and r0,r2 + eor r14,r0 + mov r0,r11 + and r0,r3 + eor r15,r0 + mov r0,r12 + and r0,r4 + eor r24,r0 + mov r0,r13 + and r0,r5 + eor r25,r0 + mov r0,r10 + or r0,r14 + eor r2,r0 + mov r0,r11 + or r0,r15 + eor r3,r0 + mov r0,r12 + or r0,r24 + eor r4,r0 + mov r0,r13 + or r0,r25 + eor r5,r0 + mov r0,r6 + or r0,r2 + eor r10,r0 + mov r0,r7 + or r0,r3 + eor r11,r0 + mov r0,r8 + or r0,r4 + eor r12,r0 + mov r0,r9 + or r0,r5 + eor r13,r0 + mov r0,r10 + and r0,r14 + eor r6,r0 + mov r0,r11 + and r0,r15 + eor r7,r0 + mov r0,r12 + and r0,r24 + eor r8,r0 + mov r0,r13 + and r0,r25 + eor r9,r0 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Y+17 + ldd r3,Y+18 + ldd r4,Y+19 + ldd r5,Y+20 + ldd r6,Y+21 + ldd r7,Y+22 + ldd r8,Y+23 + ldd r9,Y+24 + ldd r10,Y+25 + ldd r11,Y+26 + ldd r12,Y+27 + ldd r13,Y+28 + ldd r14,Y+29 + ldd r15,Y+30 + ldd r24,Y+31 + ldd r25,Y+32 + movw r20,r24 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r24,r22 + and r25,r22 + lsl r24 + rol r25 + or r24,r20 + or r25,r21 + movw r20,r14 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r14,r22 + and r15,r22 + lsl r14 + rol r15 + lsl r14 + rol r15 + lsl r14 + rol r15 + or r14,r20 + or r15,r21 + movw r20,r8 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r8,r22 + and r9,r22 + lsl r8 + rol r9 + or r8,r20 + or r9,r21 + movw r20,r6 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r6,r22 + and r7,r22 + lsl r6 + rol r7 + lsl r6 + rol r7 + lsl r6 + rol r7 + or r6,r20 + or r7,r21 + movw r20,r4 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r4,r22 + and r5,r22 + lsl r4 + rol r5 + or r4,r20 + or r5,r21 + movw r20,r2 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r2,r22 + and r3,r22 + lsl r2 + rol r3 + lsl r2 + rol r3 + lsl r2 + rol r3 + or r2,r20 + or r3,r21 + movw r20,r12 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r12,r22 + and r13,r22 + lsl r12 + rol r13 + or r12,r20 + or r13,r21 + movw r20,r10 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r10,r22 + and r11,r22 + lsl r10 + rol r11 + lsl r10 + rol r11 + lsl r10 + rol r11 + or r10,r20 + or r11,r21 + mov r0,r6 + or r0,r10 + eor r14,r0 + mov r0,r7 + or r0,r11 + eor r15,r0 + mov r0,r8 + or r0,r12 + eor r24,r0 + mov r0,r9 + or r0,r13 + eor r25,r0 + mov r0,r14 + or r0,r2 + eor r6,r0 + mov r0,r15 + or r0,r3 + eor r7,r0 + mov r0,r24 + or r0,r4 + eor r8,r0 + mov r0,r25 + or r0,r5 + eor r9,r0 + mov r0,r6 + and r0,r10 + eor r2,r0 + mov r0,r7 + and r0,r11 + eor r3,r0 + mov r0,r8 + and r0,r12 + eor r4,r0 + mov r0,r9 + and r0,r13 + eor r5,r0 + mov r0,r6 + or r0,r2 + eor r10,r0 + mov r0,r7 + or r0,r3 + eor r11,r0 + mov r0,r8 + or r0,r4 + eor r12,r0 + mov r0,r9 + or r0,r5 + eor r13,r0 + mov r0,r14 + or r0,r10 + eor r6,r0 + mov r0,r15 + or r0,r11 + eor r7,r0 + mov r0,r24 + or r0,r12 + eor r8,r0 + mov r0,r25 + or r0,r13 + eor r9,r0 + mov r0,r6 + and r0,r2 + eor r14,r0 + mov r0,r7 + and r0,r3 + eor r15,r0 + mov r0,r8 + and r0,r4 + eor r24,r0 + mov r0,r9 + and r0,r5 + eor r25,r0 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+1 + ldd r21,Y+2 + eor r20,r10 + eor r21,r11 + ldd r22,Y+13 + ldd r23,Y+14 + eor r22,r2 + eor r23,r3 + std Y+1,r22 + std Y+2,r23 + ldd r22,Y+9 + ldd r23,Y+10 + eor r22,r6 + eor r23,r7 + std Y+13,r22 + std Y+14,r23 + ldd r22,Y+5 + ldd r23,Y+6 + eor r22,r14 + eor r23,r15 + eor r20,r22 + eor r21,r23 + std Y+9,r22 + std Y+10,r23 + std Y+5,r20 + std Y+6,r21 + ldd r20,Y+3 + ldd r21,Y+4 + eor r20,r12 + eor r21,r13 + ldd r22,Y+15 + ldd r23,Y+16 + eor r22,r4 + eor r23,r5 + std Y+3,r22 + std Y+4,r23 + ldd r22,Y+11 + ldd r23,Y+12 + eor r22,r8 + eor r23,r9 + std Y+15,r22 + std Y+16,r23 + ldd r22,Y+7 + ldd r23,Y+8 + eor r22,r24 + eor r23,r25 + eor r20,r22 + eor r21,r23 + std Y+11,r22 + std Y+12,r23 + std Y+7,r20 + std Y+8,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+13 + ldd r23,Y+14 + std Y+1,r22 + std Y+2,r23 + ldd r22,Y+9 + ldd r23,Y+10 + std Y+13,r22 + std Y+14,r23 + ldd r22,Y+5 + ldd r23,Y+6 + eor r20,r22 + eor r21,r23 + std Y+9,r22 + std Y+10,r23 + std Y+5,r20 + std Y+6,r21 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r22,Y+15 + ldd r23,Y+16 + std Y+3,r22 + std Y+4,r23 + ldd r22,Y+11 + ldd r23,Y+12 + std Y+15,r22 + std Y+16,r23 + ldd r22,Y+7 + ldd r23,Y+8 + eor r20,r22 + eor r21,r23 + std Y+11,r22 + std Y+12,r23 + std Y+7,r20 + std Y+8,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + movw r20,r10 + movw r22,r12 + movw r10,r2 + movw r12,r4 + movw r2,r6 + movw r4,r8 + movw r6,r14 + movw r8,r24 + movw r14,r20 + movw r24,r22 + eor r14,r6 + eor r15,r7 + eor r24,r8 + eor r25,r9 + ldd r0,Y+5 + eor r0,r14 + std Y+5,r0 + ldd r0,Y+6 + eor r0,r15 + std Y+6,r0 + ldd r0,Y+7 + eor r0,r24 + std Y+7,r0 + ldd r0,Y+8 + eor r0,r25 + std Y+8,r0 + ldd r0,Y+9 + eor r0,r6 + std Y+9,r0 + ldd r0,Y+10 + eor r0,r7 + std Y+10,r0 + ldd r0,Y+11 + eor r0,r8 + std Y+11,r0 + ldd r0,Y+12 + eor r0,r9 + std Y+12,r0 + ldd r0,Y+13 + eor r0,r2 + std Y+13,r0 + ldd r0,Y+14 + eor r0,r3 + std Y+14,r0 + ldd r0,Y+15 + eor r0,r4 + std Y+15,r0 + ldd r0,Y+16 + eor r0,r5 + std Y+16,r0 + ldd r0,Y+1 + eor r0,r10 + std Y+1,r0 + ldd r0,Y+2 + eor r0,r11 + std Y+2,r0 + ldd r0,Y+3 + eor r0,r12 + std Y+3,r0 + ldd r0,Y+4 + eor r0,r13 + std Y+4,r0 + mov r0,r6 + or r0,r14 + eor r2,r0 + mov r0,r7 + or r0,r15 + eor r3,r0 + mov r0,r8 + or r0,r24 + eor r4,r0 + mov r0,r9 + or r0,r25 + eor r5,r0 + mov r0,r2 + or r0,r10 + eor r6,r0 + mov r0,r3 + or r0,r11 + eor r7,r0 + mov r0,r4 + or r0,r12 + eor r8,r0 + mov r0,r5 + or r0,r13 + eor r9,r0 + mov r0,r6 + and r0,r14 + eor r10,r0 + mov r0,r7 + and r0,r15 + eor r11,r0 + mov r0,r8 + and r0,r24 + eor r12,r0 + mov r0,r9 + and r0,r25 + eor r13,r0 + mov r0,r6 + or r0,r10 + eor r14,r0 + mov r0,r7 + or r0,r11 + eor r15,r0 + mov r0,r8 + or r0,r12 + eor r24,r0 + mov r0,r9 + or r0,r13 + eor r25,r0 + mov r0,r2 + or r0,r14 + eor r6,r0 + mov r0,r3 + or r0,r15 + eor r7,r0 + mov r0,r4 + or r0,r24 + eor r8,r0 + mov r0,r5 + or r0,r25 + eor r9,r0 + mov r0,r6 + and r0,r10 + eor r2,r0 + mov r0,r7 + and r0,r11 + eor r3,r0 + mov r0,r8 + and r0,r12 + eor r4,r0 + mov r0,r9 + and r0,r13 + eor r5,r0 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r3,Y+2 + ldd r4,Y+3 + ldd r5,Y+4 + ldd r6,Y+5 + ldd r7,Y+6 + ldd r8,Y+7 + ldd r9,Y+8 + ldd r10,Y+9 + ldd r11,Y+10 + ldd r12,Y+11 + ldd r13,Y+12 + ldd r14,Y+13 + ldd r15,Y+14 + ldd r24,Y+15 + ldd r25,Y+16 + mov r0,r6 + or r0,r14 + eor r2,r0 + mov r0,r7 + or r0,r15 + eor r3,r0 + mov r0,r8 + or r0,r24 + eor r4,r0 + mov r0,r9 + or r0,r25 + eor r5,r0 + mov r0,r2 + or r0,r10 + eor r6,r0 + mov r0,r3 + or r0,r11 + eor r7,r0 + mov r0,r4 + or r0,r12 + eor r8,r0 + mov r0,r5 + or r0,r13 + eor r9,r0 + mov r0,r6 + and r0,r14 + eor r10,r0 + mov r0,r7 + and r0,r15 + eor r11,r0 + mov r0,r8 + and r0,r24 + eor r12,r0 + mov r0,r9 + and r0,r25 + eor r13,r0 + mov r0,r6 + or r0,r10 + eor r14,r0 + mov r0,r7 + or r0,r11 + eor r15,r0 + mov r0,r8 + or r0,r12 + eor r24,r0 + mov r0,r9 + or r0,r13 + eor r25,r0 + mov r0,r2 + or r0,r14 + eor r6,r0 + mov r0,r3 + or r0,r15 + eor r7,r0 + mov r0,r4 + or r0,r24 + eor r8,r0 + mov r0,r5 + or r0,r25 + eor r9,r0 + mov r0,r6 + and r0,r10 + eor r2,r0 + mov r0,r7 + and r0,r11 + eor r3,r0 + mov r0,r8 + and r0,r12 + eor r4,r0 + mov r0,r9 + and r0,r13 + eor r5,r0 + ret +2560: + ldd r26,Y+33 + ldd r27,Y+34 + ld r0,Z + eor r2,r0 + ldd r0,Z+1 + eor r3,r0 + ldd r0,Z+2 + eor r4,r0 + ldd r0,Z+3 + eor r5,r0 + ldd r0,Z+4 + eor r6,r0 + ldd r0,Z+5 + eor r7,r0 + ldd r0,Z+6 + eor r8,r0 + ldd r0,Z+7 + eor r9,r0 + ldd r0,Z+8 + eor r10,r0 + ldd r0,Z+9 + eor r11,r0 + ldd r0,Z+10 + eor r12,r0 + ldd r0,Z+11 + eor r13,r0 + ldd r0,Z+12 + eor r14,r0 + ldd r0,Z+13 + eor r15,r0 + ldd r0,Z+14 + eor r24,r0 + ldd r0,Z+15 + eor r25,r0 + st X+,r2 + st X+,r3 + st X+,r6 + st X+,r7 + st X+,r10 + st X+,r11 + st X+,r14 + st X+,r15 + ldd r20,Y+17 + ldd r21,Y+18 + ldd r0,Z+16 + eor r20,r0 + ldd r0,Z+17 + eor r21,r0 + st X+,r20 + st X+,r21 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r0,Z+20 + eor r20,r0 + ldd r0,Z+21 + eor r21,r0 + st X+,r20 + st X+,r21 + ldd r20,Y+25 + ldd r21,Y+26 + ldd r0,Z+24 + eor r20,r0 + ldd r0,Z+25 + eor r21,r0 + st X+,r20 + st X+,r21 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r0,Z+28 + eor r20,r0 + ldd r0,Z+29 + eor r21,r0 + st X+,r20 + st X+,r21 + st X+,r4 + st X+,r5 + st X+,r8 + st X+,r9 + st X+,r12 + st X+,r13 + st X+,r24 + st X+,r25 + ldd r20,Y+19 + ldd r21,Y+20 + ldd r0,Z+18 + eor r20,r0 + ldd r0,Z+19 + eor r21,r0 + st X+,r20 + st X+,r21 + ldd r20,Y+23 + ldd r21,Y+24 + ldd r0,Z+22 + eor r20,r0 + ldd r0,Z+23 + eor r21,r0 + st X+,r20 + st X+,r21 + ldd r20,Y+27 + ldd r21,Y+28 + ldd r0,Z+26 + eor r20,r0 + ldd r0,Z+27 + eor r21,r0 + st X+,r20 + st X+,r21 + ldd r20,Y+31 + ldd r21,Y+32 + ldd r0,Z+30 + eor r20,r0 + ldd r0,Z+31 + eor r21,r0 + st X+,r20 + st X+,r21 + adiw r28,34 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size saturnin_decrypt_block, .-saturnin_decrypt_block + +#endif diff --git a/saturnin/Implementations/crypto_aead/saturninctrcascadev2/rhys/internal-saturnin.c b/saturnin/Implementations/crypto_aead/saturninctrcascadev2/rhys/internal-saturnin.c new file mode 100644 index 0000000..f4be50d --- /dev/null +++ b/saturnin/Implementations/crypto_aead/saturninctrcascadev2/rhys/internal-saturnin.c @@ -0,0 +1,483 @@ +/* + * Copyright (C) 2020 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "internal-saturnin.h" + +#if !defined(__AVR__) + +/* Round constants for various combinations of rounds and domain_sep */ +static uint32_t const saturnin_rc[] = { + /* RC_10_1 */ + 0x4eb026c2, 0x90595303, 0xaa8fe632, 0xfe928a92, 0x4115a419, + 0x93539532, 0x5db1cc4e, 0x541515ca, 0xbd1f55a8, 0x5a6e1a0d, + /* RC_10_2 */ + 0x4e4526b5, 0xa3565ff0, 0x0f8f20d8, 0x0b54bee1, 0x7d1a6c9d, + 0x17a6280a, 0xaa46c986, 0xc1199062, 0x182c5cde, 0xa00d53fe, + /* RC_10_3 */ + 0x4e162698, 0xb2535ba1, 0x6c8f9d65, 0x5816ad30, 0x691fd4fa, + 0x6bf5bcf9, 0xf8eb3525, 0xb21decfa, 0x7b3da417, 0xf62c94b4, + /* RC_10_4 */ + 0x4faf265b, 0xc5484616, 0x45dcad21, 0xe08bd607, 0x0504fdb8, + 0x1e1f5257, 0x45fbc216, 0xeb529b1f, 0x52194e32, 0x5498c018, + /* RC_10_5 */ + 0x4ffc2676, 0xd44d4247, 0x26dc109c, 0xb3c9c5d6, 0x110145df, + 0x624cc6a4, 0x17563eb5, 0x9856e787, 0x3108b6fb, 0x02b90752, + /* RC_10_6 */ + 0x4f092601, 0xe7424eb4, 0x83dcd676, 0x460ff1a5, 0x2d0e8d5b, + 0xe6b97b9c, 0xe0a13b7d, 0x0d5a622f, 0x943bbf8d, 0xf8da4ea1, + /* RC_16_7 */ + 0x3fba180c, 0x563ab9ab, 0x125ea5ef, 0x859da26c, 0xb8cf779b, + 0x7d4de793, 0x07efb49f, 0x8d525306, 0x1e08e6ab, 0x41729f87, + 0x8c4aef0a, 0x4aa0c9a7, 0xd93a95ef, 0xbb00d2af, 0xb62c5bf0, + 0x386d94d8, + /* RC_16_8 */ + 0x3c9b19a7, 0xa9098694, 0x23f878da, 0xa7b647d3, 0x74fc9d78, + 0xeacaae11, 0x2f31a677, 0x4cc8c054, 0x2f51ca05, 0x5268f195, + 0x4f5b8a2b, 0xf614b4ac, 0xf1d95401, 0x764d2568, 0x6a493611, + 0x8eef9c3e +}; + +/* Loads a 32-bit word from the two halves of a 256-bit Saturnin input block */ +#define saturnin_load_word32(ptr) \ + ((((uint32_t)((ptr)[17])) << 24) | \ + (((uint32_t)((ptr)[16])) << 16) | \ + (((uint32_t)((ptr)[1])) << 8) | \ + ((uint32_t)((ptr)[0]))) + +/* Stores a 32-bit word to the two halves of a 256-bit Saturnin output block */ +#define saturnin_store_word32(ptr, x) \ + do { \ + (ptr)[0] = (uint8_t)(x); \ + (ptr)[1] = (uint8_t)((x) >> 8); \ + (ptr)[16] = (uint8_t)((x) >> 16); \ + (ptr)[17] = (uint8_t)((x) >> 24); \ + } while (0) + +/* Rotate the 4-bit nibbles within a 16-bit word left */ +#define leftRotate4_N(a, mask1, bits1, mask2, bits2) \ + do { \ + (a) = (((a) & (mask1)) << (bits1)) | \ + (((a) & ((mask1) ^ (uint32_t)0xFFFFU)) >> (4 - (bits1))) | \ + (((a) & (((uint32_t)(mask2)) << 16)) << (bits2)) | \ + (((a) & (((uint32_t)((mask2)) << 16) ^ 0xFFFF0000U)) >> (4 - (bits2))); \ + } while (0) + +/* Rotate 16-bit subwords left */ +#define leftRotate16_N(a, mask1, bits1, mask2, bits2) \ + do { \ + (a) = (((a) & (mask1)) << (bits1)) | \ + (((a) & ((mask1) ^ (uint32_t)0xFFFFU)) >> (16 - (bits1))) | \ + (((a) & (((uint32_t)(mask2)) << 16)) << (bits2)) | \ + (((a) & (((uint32_t)((mask2)) << 16) ^ 0xFFFF0000U)) >> (16 - (bits2))); \ + } while (0) + +/** + * \brief XOR the key into the Saturnin state. + * + * \param x0 First word of the bit-sliced state. + * \param x1 Second word of the bit-sliced state. + * \param x2 Third word of the bit-sliced state. + * \param x3 Fourth word of the bit-sliced state. + * \param x4 Fifth word of the bit-sliced state. + * \param x5 Sixth word of the bit-sliced state. + * \param x6 Seventh word of the bit-sliced state. + * \param x7 Eighth word of the bit-sliced state. + */ +#define saturnin_xor_key(x0, x1, x2, x3, x4, x5, x6, x7) \ + do { \ + x0 ^= ks->k[0]; \ + x1 ^= ks->k[1]; \ + x2 ^= ks->k[2]; \ + x3 ^= ks->k[3]; \ + x4 ^= ks->k[4]; \ + x5 ^= ks->k[5]; \ + x6 ^= ks->k[6]; \ + x7 ^= ks->k[7]; \ + } while (0) + +/** + * \brief XOR a rotated version of the key into the Saturnin state. + * + * \param x0 First word of the bit-sliced state. + * \param x1 Second word of the bit-sliced state. + * \param x2 Third word of the bit-sliced state. + * \param x3 Fourth word of the bit-sliced state. + * \param x4 Fifth word of the bit-sliced state. + * \param x5 Sixth word of the bit-sliced state. + * \param x6 Seventh word of the bit-sliced state. + * \param x7 Eighth word of the bit-sliced state. + */ +#define saturnin_xor_key_rotated(x0, x1, x2, x3, x4, x5, x6, x7) \ + do { \ + x0 ^= ks->k[8]; \ + x1 ^= ks->k[9]; \ + x2 ^= ks->k[10]; \ + x3 ^= ks->k[11]; \ + x4 ^= ks->k[12]; \ + x5 ^= ks->k[13]; \ + x6 ^= ks->k[14]; \ + x7 ^= ks->k[15]; \ + } while (0) + +/** + * \brief Applies the Saturnin S-box to a bit-sliced set of nibbles. + * + * \param a First bit-slice. + * \param b Second bit-slice. + * \param c Third bit-slice. + * \param d Fourth bit-slice. + * + * The S-box also involves a rotation on the output words. We perform the + * rotation implicitly in the higher layers. + */ +#define saturnin_sbox(a, b, c, d) \ + do { \ + (a) ^= (b) & (c); \ + (b) ^= (a) | (d); \ + (d) ^= (b) | (c); \ + (c) ^= (b) & (d); \ + (b) ^= (a) | (c); \ + (a) ^= (b) | (d); \ + } while (0) + +/** + * \brief Applies the inverse of the Saturnin S-box to a set of nibbles. + * + * \param a First bit-slice. + * \param b Second bit-slice. + * \param c Third bit-slice. + * \param d Fourth bit-slice. + * + * The inverse of the S-box also involves a rotation on the input words. + * We perform the rotation implicitly in the higher layers. + */ +#define saturnin_sbox_inverse(a, b, c, d) \ + do { \ + (a) ^= (b) | (d); \ + (b) ^= (a) | (c); \ + (c) ^= (b) & (d); \ + (d) ^= (b) | (c); \ + (b) ^= (a) | (d); \ + (a) ^= (b) & (c); \ + } while (0) + +/* Helpers for MDS matrix operations */ +#define SWAP(a) (((a) << 16) | ((a) >> 16)) +#define MUL(x0, x1, x2, x3) \ + do { \ + temp = x0; x0 = x1; x1 = x2; x2 = x3; x3 = temp ^ x0; \ + } while (0) +#define MULINV(x0, x1, x2, x3) \ + do { \ + temp = x3; x3 = x2; x2 = x1; x1 = x0; x0 = x1 ^ temp; \ + } while (0) + +/** + * \brief Applies the MDS matrix to the Saturnin state. + * + * \param x0 First word of the bit-sliced state. + * \param x1 Second word of the bit-sliced state. + * \param x2 Third word of the bit-sliced state. + * \param x3 Fourth word of the bit-sliced state. + * \param x4 Fifth word of the bit-sliced state. + * \param x5 Sixth word of the bit-sliced state. + * \param x6 Seventh word of the bit-sliced state. + * \param x7 Eighth word of the bit-sliced state. + */ +#define saturnin_mds(x0, x1, x2, x3, x4, x5, x6, x7) \ + do { \ + x0 ^= x4; x1 ^= x5; x2 ^= x6; x3 ^= x7; \ + MUL(x4, x5, x6, x7); \ + x4 ^= SWAP(x0); x5 ^= SWAP(x1); \ + x6 ^= SWAP(x2); x7 ^= SWAP(x3); \ + MUL(x0, x1, x2, x3); \ + MUL(x0, x1, x2, x3); \ + x0 ^= x4; x1 ^= x5; x2 ^= x6; x3 ^= x7; \ + x4 ^= SWAP(x0); x5 ^= SWAP(x1); \ + x6 ^= SWAP(x2); x7 ^= SWAP(x3); \ + } while (0) + +/** + * \brief Applies the inverse of the MDS matrix to the Saturnin state. + * + * \param x0 First word of the bit-sliced state. + * \param x1 Second word of the bit-sliced state. + * \param x2 Third word of the bit-sliced state. + * \param x3 Fourth word of the bit-sliced state. + * \param x4 Fifth word of the bit-sliced state. + * \param x5 Sixth word of the bit-sliced state. + * \param x6 Seventh word of the bit-sliced state. + * \param x7 Eighth word of the bit-sliced state. + */ +#define saturnin_mds_inverse(x0, x1, x2, x3, x4, x5, x6, x7) \ + do { \ + x6 ^= SWAP(x2); x7 ^= SWAP(x3); \ + x4 ^= SWAP(x0); x5 ^= SWAP(x1); \ + x0 ^= x4; x1 ^= x5; x2 ^= x6; x3 ^= x7; \ + MULINV(x0, x1, x2, x3); \ + MULINV(x0, x1, x2, x3); \ + x6 ^= SWAP(x2); x7 ^= SWAP(x3); \ + x4 ^= SWAP(x0); x5 ^= SWAP(x1); \ + MULINV(x4, x5, x6, x7); \ + x0 ^= x4; x1 ^= x5; x2 ^= x6; x3 ^= x7; \ + } while (0) + +/** + * \brief Applies the slice permutation to the Saturnin state. + * + * \param x0 First word of the bit-sliced state. + * \param x1 Second word of the bit-sliced state. + * \param x2 Third word of the bit-sliced state. + * \param x3 Fourth word of the bit-sliced state. + * \param x4 Fifth word of the bit-sliced state. + * \param x5 Sixth word of the bit-sliced state. + * \param x6 Seventh word of the bit-sliced state. + * \param x7 Eighth word of the bit-sliced state. + */ +#define saturnin_slice(x0, x1, x2, x3, x4, x5, x6, x7) \ + do { \ + leftRotate4_N(x0, 0xFFFFU, 0, 0x3333, 2); \ + leftRotate4_N(x1, 0xFFFFU, 0, 0x3333, 2); \ + leftRotate4_N(x2, 0xFFFFU, 0, 0x3333, 2); \ + leftRotate4_N(x3, 0xFFFFU, 0, 0x3333, 2); \ + leftRotate4_N(x4, 0x7777U, 1, 0x1111, 3); \ + leftRotate4_N(x5, 0x7777U, 1, 0x1111, 3); \ + leftRotate4_N(x6, 0x7777U, 1, 0x1111, 3); \ + leftRotate4_N(x7, 0x7777U, 1, 0x1111, 3); \ + } while (0) + +/** + * \brief Applies the inverse of the slice permutation to the Saturnin state. + * + * \param x0 First word of the bit-sliced state. + * \param x1 Second word of the bit-sliced state. + * \param x2 Third word of the bit-sliced state. + * \param x3 Fourth word of the bit-sliced state. + * \param x4 Fifth word of the bit-sliced state. + * \param x5 Sixth word of the bit-sliced state. + * \param x6 Seventh word of the bit-sliced state. + * \param x7 Eighth word of the bit-sliced state. + */ +#define saturnin_slice_inverse(x0, x1, x2, x3, x4, x5, x6, x7) \ + do { \ + leftRotate4_N(x0, 0xFFFFU, 0, 0x3333, 2); \ + leftRotate4_N(x1, 0xFFFFU, 0, 0x3333, 2); \ + leftRotate4_N(x2, 0xFFFFU, 0, 0x3333, 2); \ + leftRotate4_N(x3, 0xFFFFU, 0, 0x3333, 2); \ + leftRotate4_N(x4, 0x1111U, 3, 0x7777, 1); \ + leftRotate4_N(x5, 0x1111U, 3, 0x7777, 1); \ + leftRotate4_N(x6, 0x1111U, 3, 0x7777, 1); \ + leftRotate4_N(x7, 0x1111U, 3, 0x7777, 1); \ + } while (0) + +/** + * \brief Applies the sheet permutation to the Saturnin state. + * + * \param x0 First word of the bit-sliced state. + * \param x1 Second word of the bit-sliced state. + * \param x2 Third word of the bit-sliced state. + * \param x3 Fourth word of the bit-sliced state. + * \param x4 Fifth word of the bit-sliced state. + * \param x5 Sixth word of the bit-sliced state. + * \param x6 Seventh word of the bit-sliced state. + * \param x7 Eighth word of the bit-sliced state. + */ +#define saturnin_sheet(x0, x1, x2, x3, x4, x5, x6, x7) \ + do { \ + leftRotate16_N(x0, 0xFFFFU, 0, 0x00FF, 8); \ + leftRotate16_N(x1, 0xFFFFU, 0, 0x00FF, 8); \ + leftRotate16_N(x2, 0xFFFFU, 0, 0x00FF, 8); \ + leftRotate16_N(x3, 0xFFFFU, 0, 0x00FF, 8); \ + leftRotate16_N(x4, 0x0FFFU, 4, 0x000F, 12); \ + leftRotate16_N(x5, 0x0FFFU, 4, 0x000F, 12); \ + leftRotate16_N(x6, 0x0FFFU, 4, 0x000F, 12); \ + leftRotate16_N(x7, 0x0FFFU, 4, 0x000F, 12); \ + } while (0) + +/** + * \brief Applies the inverse of the sheet permutation to the Saturnin state. + * + * \param x0 First word of the bit-sliced state. + * \param x1 Second word of the bit-sliced state. + * \param x2 Third word of the bit-sliced state. + * \param x3 Fourth word of the bit-sliced state. + * \param x4 Fifth word of the bit-sliced state. + * \param x5 Sixth word of the bit-sliced state. + * \param x6 Seventh word of the bit-sliced state. + * \param x7 Eighth word of the bit-sliced state. + */ +#define saturnin_sheet_inverse(x0, x1, x2, x3, x4, x5, x6, x7) \ + do { \ + leftRotate16_N(x0, 0xFFFFU, 0, 0x00FF, 8); \ + leftRotate16_N(x1, 0xFFFFU, 0, 0x00FF, 8); \ + leftRotate16_N(x2, 0xFFFFU, 0, 0x00FF, 8); \ + leftRotate16_N(x3, 0xFFFFU, 0, 0x00FF, 8); \ + leftRotate16_N(x4, 0x000FU, 12, 0x0FFF, 4); \ + leftRotate16_N(x5, 0x000FU, 12, 0x0FFF, 4); \ + leftRotate16_N(x6, 0x000FU, 12, 0x0FFF, 4); \ + leftRotate16_N(x7, 0x000FU, 12, 0x0FFF, 4); \ + } while (0) + +void saturnin_setup_key + (saturnin_key_schedule_t *ks, const unsigned char *key) +{ + int index; + uint32_t temp; + for (index = 0; index < 16; index += 2) { + temp = saturnin_load_word32(key + index); + ks->k[index / 2] = temp; + ks->k[8 + (index / 2)] = ((temp & 0x001F001FU) << 11) | + ((temp >> 5) & 0x07FF07FFU); + } +} + +void saturnin_encrypt_block + (const saturnin_key_schedule_t *ks, unsigned char *output, + const unsigned char *input, unsigned domain) +{ + unsigned rounds = (domain >= SATURNIN_DOMAIN_16_7) ? 8 : 5; + const uint32_t *rc = saturnin_rc + domain; + uint32_t x0, x1, x2, x3, x4, x5, x6, x7, temp; + + /* Load the input into local variables */ + x0 = saturnin_load_word32(input); + x1 = saturnin_load_word32(input + 2); + x2 = saturnin_load_word32(input + 4); + x3 = saturnin_load_word32(input + 6); + x4 = saturnin_load_word32(input + 8); + x5 = saturnin_load_word32(input + 10); + x6 = saturnin_load_word32(input + 12); + x7 = saturnin_load_word32(input + 14); + + /* XOR the key into the state */ + saturnin_xor_key(x0, x1, x2, x3, x4, x5, x6, x7); + + /* Perform all encryption rounds, two at a time */ + for (; rounds > 0; --rounds, rc += 2) { + /* Even rounds */ + saturnin_sbox(x0, x1, x2, x3); + saturnin_sbox(x4, x5, x6, x7); + saturnin_mds(x1, x2, x3, x0, x7, x5, x4, x6); + saturnin_sbox(x1, x2, x3, x0); + saturnin_sbox(x7, x5, x4, x6); + saturnin_slice(x2, x3, x0, x1, x6, x5, x7, x4); + saturnin_mds(x2, x3, x0, x1, x6, x5, x7, x4); + saturnin_slice_inverse(x2, x3, x0, x1, x6, x5, x7, x4); + x2 ^= rc[0]; + saturnin_xor_key_rotated(x2, x3, x0, x1, x6, x5, x7, x4); + + /* Odd rounds */ + saturnin_sbox(x2, x3, x0, x1); + saturnin_sbox(x6, x5, x7, x4); + saturnin_mds(x3, x0, x1, x2, x4, x5, x6, x7); + saturnin_sbox(x3, x0, x1, x2); + saturnin_sbox(x4, x5, x6, x7); + saturnin_sheet(x0, x1, x2, x3, x7, x5, x4, x6); + saturnin_mds(x0, x1, x2, x3, x7, x5, x4, x6); + saturnin_sheet_inverse(x0, x1, x2, x3, x7, x5, x4, x6); + x0 ^= rc[1]; + saturnin_xor_key(x0, x1, x2, x3, x7, x5, x4, x6); + + /* Correct the rotation of the second half before the next round */ + temp = x4; + x4 = x7; + x7 = x6; + x6 = temp; + } + + /* Store the local variables to the output buffer */ + saturnin_store_word32(output, x0); + saturnin_store_word32(output + 2, x1); + saturnin_store_word32(output + 4, x2); + saturnin_store_word32(output + 6, x3); + saturnin_store_word32(output + 8, x4); + saturnin_store_word32(output + 10, x5); + saturnin_store_word32(output + 12, x6); + saturnin_store_word32(output + 14, x7); +} + +void saturnin_decrypt_block + (const saturnin_key_schedule_t *ks, unsigned char *output, + const unsigned char *input, unsigned domain) +{ + unsigned rounds = (domain >= SATURNIN_DOMAIN_16_7) ? 8 : 5; + const uint32_t *rc = saturnin_rc + domain + (rounds - 1) * 2; + uint32_t x0, x1, x2, x3, x4, x5, x6, x7, temp; + + /* Load the input into local variables */ + x0 = saturnin_load_word32(input); + x1 = saturnin_load_word32(input + 2); + x2 = saturnin_load_word32(input + 4); + x3 = saturnin_load_word32(input + 6); + x4 = saturnin_load_word32(input + 8); + x5 = saturnin_load_word32(input + 10); + x6 = saturnin_load_word32(input + 12); + x7 = saturnin_load_word32(input + 14); + + /* Perform all decryption rounds, two at a time */ + for (; rounds > 0; --rounds, rc -= 2) { + /* Correct the rotation of the second half before the next round */ + temp = x6; + x6 = x7; + x7 = x4; + x4 = temp; + + /* Odd rounds */ + saturnin_xor_key(x0, x1, x2, x3, x7, x5, x4, x6); + x0 ^= rc[1]; + saturnin_sheet(x0, x1, x2, x3, x7, x5, x4, x6); + saturnin_mds_inverse(x0, x1, x2, x3, x7, x5, x4, x6); + saturnin_sheet_inverse(x0, x1, x2, x3, x7, x5, x4, x6); + saturnin_sbox_inverse(x3, x0, x1, x2); + saturnin_sbox_inverse(x4, x5, x6, x7); + saturnin_mds_inverse(x3, x0, x1, x2, x4, x5, x6, x7); + saturnin_sbox_inverse(x2, x3, x0, x1); + saturnin_sbox_inverse(x6, x5, x7, x4); + + /* Even rounds */ + saturnin_xor_key_rotated(x2, x3, x0, x1, x6, x5, x7, x4); + x2 ^= rc[0]; + saturnin_slice(x2, x3, x0, x1, x6, x5, x7, x4); + saturnin_mds_inverse(x2, x3, x0, x1, x6, x5, x7, x4); + saturnin_slice_inverse(x2, x3, x0, x1, x6, x5, x7, x4); + saturnin_sbox_inverse(x1, x2, x3, x0); + saturnin_sbox_inverse(x7, x5, x4, x6); + saturnin_mds_inverse(x1, x2, x3, x0, x7, x5, x4, x6); + saturnin_sbox_inverse(x0, x1, x2, x3); + saturnin_sbox_inverse(x4, x5, x6, x7); + } + + /* XOR the key into the state */ + saturnin_xor_key(x0, x1, x2, x3, x4, x5, x6, x7); + + /* Store the local variables to the output buffer */ + saturnin_store_word32(output, x0); + saturnin_store_word32(output + 2, x1); + saturnin_store_word32(output + 4, x2); + saturnin_store_word32(output + 6, x3); + saturnin_store_word32(output + 8, x4); + saturnin_store_word32(output + 10, x5); + saturnin_store_word32(output + 12, x6); + saturnin_store_word32(output + 14, x7); +} + +#endif /* !__AVR__ */ diff --git a/saturnin/Implementations/crypto_aead/saturninctrcascadev2/rhys/internal-saturnin.h b/saturnin/Implementations/crypto_aead/saturninctrcascadev2/rhys/internal-saturnin.h new file mode 100644 index 0000000..8af07c3 --- /dev/null +++ b/saturnin/Implementations/crypto_aead/saturninctrcascadev2/rhys/internal-saturnin.h @@ -0,0 +1,137 @@ +/* + * Copyright (C) 2020 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef LW_INTERNAL_SATURNIN_H +#define LW_INTERNAL_SATURNIN_H + +/** + * \file internal-saturnin.h + * \brief Saturnin block cipher. + * + * References: https://project.inria.fr/saturnin/ + */ + +#include "internal-util.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * \brief Size of a Saturnin block in bytes. + */ +#define SATURNIN_BLOCK_SIZE 32 + +/** + * \brief Domain separator index 1 for the 10-round version of Saturnin. + */ +#define SATURNIN_DOMAIN_10_1 0 + +/** + * \brief Domain separator index 2 for the 10-round version of Saturnin. + */ +#define SATURNIN_DOMAIN_10_2 10 + +/** + * \brief Domain separator index 3 for the 10-round version of Saturnin. + */ +#define SATURNIN_DOMAIN_10_3 20 + +/** + * \brief Domain separator index 4 for the 10-round version of Saturnin. + */ +#define SATURNIN_DOMAIN_10_4 30 + +/** + * \brief Domain separator index 5 for the 10-round version of Saturnin. + */ +#define SATURNIN_DOMAIN_10_5 40 + +/** + * \brief Domain separator index 6 for the 10-round version of Saturnin. + */ +#define SATURNIN_DOMAIN_10_6 50 + +/** + * \brief Domain separator index 7 for the 16-round version of Saturnin. + */ +#define SATURNIN_DOMAIN_16_7 60 + +/** + * \brief Domain separator index 8 for the 16-round version of Saturnin. + */ +#define SATURNIN_DOMAIN_16_8 76 + +/** + * \brief Structure of the key schedule for Saturnin. + */ +typedef struct +{ + /** Pre-computed round keys for Saturnin */ + uint32_t k[16]; + +} saturnin_key_schedule_t; + +/** + * \brief Sets up a key schedule for Saturnin. + * + * \param ks Points to the key schedule to initialize. + * \param key Points to the 32 bytes of the key data. + */ +void saturnin_setup_key + (saturnin_key_schedule_t *ks, const unsigned char *key); + +/** + * \brief Encrypts a 256-bit block with Saturnin. + * + * \param ks Points to the Saturnin key schedule. + * \param output Output buffer which must be at least 32 bytes in length. + * \param input Input buffer which must be at least 32 bytes in length. + * \param domain Domain separator and round count indicator. + * + * The \a input and \a output buffers can be the same buffer for + * in-place encryption. + */ +void saturnin_encrypt_block + (const saturnin_key_schedule_t *ks, unsigned char *output, + const unsigned char *input, unsigned domain); + +/** + * \brief Decrypts a 256-bit block with Saturnin. + * + * \param ks Points to the Saturnin key schedule. + * \param output Output buffer which must be at least 32 bytes in length. + * \param input Input buffer which must be at least 32 bytes in length. + * \param domain Domain separator and round count indicator. + * + * The \a input and \a output buffers can be the same buffer for + * in-place decryption. + */ +void saturnin_decrypt_block + (const saturnin_key_schedule_t *ks, unsigned char *output, + const unsigned char *input, unsigned domain); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/saturnin/Implementations/crypto_aead/saturninctrcascadev2/rhys/saturnin.c b/saturnin/Implementations/crypto_aead/saturninctrcascadev2/rhys/saturnin.c index 734fc69..d2bd2cc 100644 --- a/saturnin/Implementations/crypto_aead/saturninctrcascadev2/rhys/saturnin.c +++ b/saturnin/Implementations/crypto_aead/saturninctrcascadev2/rhys/saturnin.c @@ -21,7 +21,7 @@ */ #include "saturnin.h" -#include "internal-util.h" +#include "internal-saturnin.h" #include aead_cipher_t const saturnin_cipher = { @@ -57,440 +57,22 @@ aead_hash_algorithm_t const saturnin_hash_algorithm = { 0 /* squeeze */ }; -/* Round constant tables for various combinations of rounds and domain_sep */ -static uint32_t const RC_10_1[] = { - 0x4eb026c2, 0x90595303, 0xaa8fe632, 0xfe928a92, 0x4115a419, - 0x93539532, 0x5db1cc4e, 0x541515ca, 0xbd1f55a8, 0x5a6e1a0d -}; -static uint32_t const RC_10_2[] = { - 0x4e4526b5, 0xa3565ff0, 0x0f8f20d8, 0x0b54bee1, 0x7d1a6c9d, - 0x17a6280a, 0xaa46c986, 0xc1199062, 0x182c5cde, 0xa00d53fe -}; -static uint32_t const RC_10_3[] = { - 0x4e162698, 0xb2535ba1, 0x6c8f9d65, 0x5816ad30, 0x691fd4fa, - 0x6bf5bcf9, 0xf8eb3525, 0xb21decfa, 0x7b3da417, 0xf62c94b4 -}; -static uint32_t const RC_10_4[] = { - 0x4faf265b, 0xc5484616, 0x45dcad21, 0xe08bd607, 0x0504fdb8, - 0x1e1f5257, 0x45fbc216, 0xeb529b1f, 0x52194e32, 0x5498c018 -}; -static uint32_t const RC_10_5[] = { - 0x4ffc2676, 0xd44d4247, 0x26dc109c, 0xb3c9c5d6, 0x110145df, - 0x624cc6a4, 0x17563eb5, 0x9856e787, 0x3108b6fb, 0x02b90752 -}; -static uint32_t const RC_10_6[] = { - 0x4f092601, 0xe7424eb4, 0x83dcd676, 0x460ff1a5, 0x2d0e8d5b, - 0xe6b97b9c, 0xe0a13b7d, 0x0d5a622f, 0x943bbf8d, 0xf8da4ea1 -}; -static uint32_t const RC_16_7[] = { - 0x3fba180c, 0x563ab9ab, 0x125ea5ef, 0x859da26c, 0xb8cf779b, - 0x7d4de793, 0x07efb49f, 0x8d525306, 0x1e08e6ab, 0x41729f87, - 0x8c4aef0a, 0x4aa0c9a7, 0xd93a95ef, 0xbb00d2af, 0xb62c5bf0, - 0x386d94d8 -}; -static uint32_t const RC_16_8[] = { - 0x3c9b19a7, 0xa9098694, 0x23f878da, 0xa7b647d3, 0x74fc9d78, - 0xeacaae11, 0x2f31a677, 0x4cc8c054, 0x2f51ca05, 0x5268f195, - 0x4f5b8a2b, 0xf614b4ac, 0xf1d95401, 0x764d2568, 0x6a493611, - 0x8eef9c3e -}; - -/* Rotate the 4-bit nibbles within a 16-bit word left */ -#define leftRotate4_N(a, mask1, bits1, mask2, bits2) \ - do { \ - uint32_t _temp = (a); \ - (a) = ((_temp & (mask1)) << (bits1)) | \ - ((_temp & ((mask1) ^ (uint32_t)0xFFFFU)) >> (4 - (bits1))) | \ - ((_temp & (((uint32_t)(mask2)) << 16)) << (bits2)) | \ - ((_temp & (((uint32_t)((mask2)) << 16) ^ 0xFFFF0000U)) >> (4 - (bits2))); \ - } while (0) - -/* Rotate 16-bit subwords left */ -#define leftRotate16_N(a, mask1, bits1, mask2, bits2) \ - do { \ - uint32_t _temp = (a); \ - (a) = ((_temp & (mask1)) << (bits1)) | \ - ((_temp & ((mask1) ^ (uint32_t)0xFFFFU)) >> (16 - (bits1))) | \ - ((_temp & (((uint32_t)(mask2)) << 16)) << (bits2)) | \ - ((_temp & (((uint32_t)((mask2)) << 16) ^ 0xFFFF0000U)) >> (16 - (bits2))); \ - } while (0) - -/* XOR the SATURNIN state with the key */ -#define saturnin_xor_key() \ - do { \ - for (index = 0; index < 8; ++index) \ - S[index] ^= K[index]; \ - } while (0) - -/* XOR the SATURNIN state with a rotated version of the key */ -#define saturnin_xor_key_rotated() \ - do { \ - for (index = 0; index < 8; ++index) \ - S[index] ^= K[index + 8]; \ - } while (0) - -/* Apply an SBOX layer for SATURNIN - definition from the specification */ -#define S_LAYER(a, b, c, d) \ - do { \ - (a) ^= (b) & (c); \ - (b) ^= (a) | (d); \ - (d) ^= (b) | (c); \ - (c) ^= (b) & (d); \ - (b) ^= (a) | (c); \ - (a) ^= (b) | (d); \ - } while (0) - -/* Apply an SBOX layer for SATURNIN in reverse */ -#define S_LAYER_INVERSE(a, b, c, d) \ - do { \ - (a) ^= (b) | (d); \ - (b) ^= (a) | (c); \ - (c) ^= (b) & (d); \ - (d) ^= (b) | (c); \ - (b) ^= (a) | (d); \ - (a) ^= (b) & (c); \ - } while (0) - -/** - * \brief Applies the SBOX to the SATURNIN state. - * - * \param S The state. - */ -static void saturnin_sbox(uint32_t S[8]) -{ - uint32_t a, b, c, d; - - /* PI_0 on the first half of the state */ - a = S[0]; b = S[1]; c = S[2]; d = S[3]; - S_LAYER(a, b, c, d); - S[0] = b; S[1] = c; S[2] = d; S[3] = a; - - /* PI_1 on the second half of the state */ - a = S[4]; b = S[5]; c = S[6]; d = S[7]; - S_LAYER(a, b, c, d); - S[4] = d; S[5] = b; S[6] = a; S[7] = c; -} - -/** - * \brief Applies the inverse of the SBOX to the SATURNIN state. - * - * \param S The state. - */ -static void saturnin_sbox_inverse(uint32_t S[8]) -{ - uint32_t a, b, c, d; - - /* PI_0 on the first half of the state */ - b = S[0]; c = S[1]; d = S[2]; a = S[3]; - S_LAYER_INVERSE(a, b, c, d); - S[0] = a; S[1] = b; S[2] = c; S[3] = d; - - /* PI_1 on the second half of the state */ - d = S[4]; b = S[5]; a = S[6]; c = S[7]; - S_LAYER_INVERSE(a, b, c, d); - S[4] = a; S[5] = b; S[6] = c; S[7] = d; -} - -/** - * \brief Applies the MDS matrix to the SATURNIN state. - * - * \param S The state. - */ -static void saturnin_mds(uint32_t S[8]) -{ - uint32_t x0, x1, x2, x3, x4, x5, x6, x7; - uint32_t tmp; - - /* Load the state into temporary working variables */ - x0 = S[0]; x1 = S[1]; x2 = S[2]; x3 = S[3]; - x4 = S[4]; x5 = S[5]; x6 = S[6]; x7 = S[7]; - - /* Apply the MDS matrix to the state */ - #define SWAP(a) (((a) << 16) | ((a) >> 16)) - #define MUL(x0, x1, x2, x3, tmp) \ - do { \ - tmp = x0; x0 = x1; x1 = x2; x2 = x3; x3 = tmp ^ x0; \ - } while (0) - x0 ^= x4; x1 ^= x5; x2 ^= x6; x3 ^= x7; - MUL(x4, x5, x6, x7, tmp); - x4 ^= SWAP(x0); x5 ^= SWAP(x1); - x6 ^= SWAP(x2); x7 ^= SWAP(x3); - MUL(x0, x1, x2, x3, tmp); - MUL(x0, x1, x2, x3, tmp); - x0 ^= x4; x1 ^= x5; x2 ^= x6; x3 ^= x7; - x4 ^= SWAP(x0); x5 ^= SWAP(x1); - x6 ^= SWAP(x2); x7 ^= SWAP(x3); - - /* Store the temporary working variables back into the state */ - S[0] = x0; S[1] = x1; S[2] = x2; S[3] = x3; - S[4] = x4; S[5] = x5; S[6] = x6; S[7] = x7; -} - -/** - * \brief Applies the inverse of the MDS matrix to the SATURNIN state. - * - * \param S The state. - */ -static void saturnin_mds_inverse(uint32_t S[8]) -{ - uint32_t x0, x1, x2, x3, x4, x5, x6, x7; - uint32_t tmp; - - /* Load the state into temporary working variables */ - x0 = S[0]; x1 = S[1]; x2 = S[2]; x3 = S[3]; - x4 = S[4]; x5 = S[5]; x6 = S[6]; x7 = S[7]; - - /* Apply the inverse of the MDS matrix to the state */ - #define MULINV(x0, x1, x2, x3, tmp) \ - do { \ - tmp = x3; x3 = x2; x2 = x1; x1 = x0; x0 = x1 ^ tmp; \ - } while (0) - x6 ^= SWAP(x2); x7 ^= SWAP(x3); - x4 ^= SWAP(x0); x5 ^= SWAP(x1); - x0 ^= x4; x1 ^= x5; x2 ^= x6; x3 ^= x7; - MULINV(x0, x1, x2, x3, tmp); - MULINV(x0, x1, x2, x3, tmp); - x6 ^= SWAP(x2); x7 ^= SWAP(x3); - x4 ^= SWAP(x0); x5 ^= SWAP(x1); - MULINV(x4, x5, x6, x7, tmp); - x0 ^= x4; x1 ^= x5; x2 ^= x6; x3 ^= x7; - - /* Store the temporary working variables back into the state */ - S[0] = x0; S[1] = x1; S[2] = x2; S[3] = x3; - S[4] = x4; S[5] = x5; S[6] = x6; S[7] = x7; -} - -/** - * \brief Applies the slice permutation to the SATURNIN state. - * - * \param S The state. - */ -static void saturnin_slice(uint32_t S[8]) -{ - leftRotate4_N(S[0], 0xFFFFU, 0, 0x3333, 2); - leftRotate4_N(S[1], 0xFFFFU, 0, 0x3333, 2); - leftRotate4_N(S[2], 0xFFFFU, 0, 0x3333, 2); - leftRotate4_N(S[3], 0xFFFFU, 0, 0x3333, 2); - - leftRotate4_N(S[4], 0x7777U, 1, 0x1111, 3); - leftRotate4_N(S[5], 0x7777U, 1, 0x1111, 3); - leftRotate4_N(S[6], 0x7777U, 1, 0x1111, 3); - leftRotate4_N(S[7], 0x7777U, 1, 0x1111, 3); -} - -/** - * \brief Applies the inverse of the slice permutation to the SATURNIN state. - * - * \param S The state. - */ -static void saturnin_slice_inverse(uint32_t S[8]) -{ - leftRotate4_N(S[0], 0xFFFFU, 0, 0x3333, 2); - leftRotate4_N(S[1], 0xFFFFU, 0, 0x3333, 2); - leftRotate4_N(S[2], 0xFFFFU, 0, 0x3333, 2); - leftRotate4_N(S[3], 0xFFFFU, 0, 0x3333, 2); - - leftRotate4_N(S[4], 0x1111U, 3, 0x7777, 1); - leftRotate4_N(S[5], 0x1111U, 3, 0x7777, 1); - leftRotate4_N(S[6], 0x1111U, 3, 0x7777, 1); - leftRotate4_N(S[7], 0x1111U, 3, 0x7777, 1); -} - -/** - * \brief Applies the sheet permutation to the SATURNIN state. - * - * \param S The state. - */ -static void saturnin_sheet(uint32_t S[8]) -{ - leftRotate16_N(S[0], 0xFFFFU, 0, 0x00FF, 8); - leftRotate16_N(S[1], 0xFFFFU, 0, 0x00FF, 8); - leftRotate16_N(S[2], 0xFFFFU, 0, 0x00FF, 8); - leftRotate16_N(S[3], 0xFFFFU, 0, 0x00FF, 8); - - leftRotate16_N(S[4], 0x0FFFU, 4, 0x000F, 12); - leftRotate16_N(S[5], 0x0FFFU, 4, 0x000F, 12); - leftRotate16_N(S[6], 0x0FFFU, 4, 0x000F, 12); - leftRotate16_N(S[7], 0x0FFFU, 4, 0x000F, 12); -} - -/** - * \brief Applies the inverse of the sheet permutation to the SATURNIN state. - * - * \param S The state. - */ -static void saturnin_sheet_inverse(uint32_t S[8]) -{ - leftRotate16_N(S[0], 0xFFFFU, 0, 0x00FF, 8); - leftRotate16_N(S[1], 0xFFFFU, 0, 0x00FF, 8); - leftRotate16_N(S[2], 0xFFFFU, 0, 0x00FF, 8); - leftRotate16_N(S[3], 0xFFFFU, 0, 0x00FF, 8); - - leftRotate16_N(S[4], 0x000FU, 12, 0x0FFF, 4); - leftRotate16_N(S[5], 0x000FU, 12, 0x0FFF, 4); - leftRotate16_N(S[6], 0x000FU, 12, 0x0FFF, 4); - leftRotate16_N(S[7], 0x000FU, 12, 0x0FFF, 4); -} - -/** - * \brief Encrypts a 256-bit block with the SATURNIN block cipher. - * - * \param output Ciphertext output block, 32 bytes. - * \param input Plaintext input block, 32 bytes. - * \param key Points to the 32 byte key for the block cipher. - * \param rounds Number of rounds to perform. - * \param RC Round constants to use for domain separation. - * - * The \a input and \a output buffers can be the same. - * - * \sa saturnin_block_decrypt() - */ -static void saturnin_block_encrypt - (unsigned char *output, const unsigned char *input, - const unsigned char *key, unsigned rounds, const uint32_t *RC) -{ - uint32_t K[16]; - uint32_t S[8]; - uint32_t temp; - unsigned index; - - /* Unpack the key and the input block */ - for (index = 0; index < 16; index += 2) { - temp = ((uint32_t)(key[index])) | - (((uint32_t)(key[index + 1])) << 8) | - (((uint32_t)(key[index + 16])) << 16) | - (((uint32_t)(key[index + 17])) << 24); - K[index / 2] = temp; - K[8 + (index / 2)] = ((temp & 0x001F001FU) << 11) | - ((temp >> 5) & 0x07FF07FFU); - S[index / 2] = ((uint32_t)(input[index])) | - (((uint32_t)(input[index + 1])) << 8) | - (((uint32_t)(input[index + 16])) << 16) | - (((uint32_t)(input[index + 17])) << 24); - } - - /* XOR the key into the state */ - saturnin_xor_key(); - - /* Perform all encryption rounds */ - for (; rounds > 0; rounds -= 2, RC += 2) { - saturnin_sbox(S); - saturnin_mds(S); - saturnin_sbox(S); - saturnin_slice(S); - saturnin_mds(S); - saturnin_slice_inverse(S); - S[0] ^= RC[0]; - saturnin_xor_key_rotated(); - - saturnin_sbox(S); - saturnin_mds(S); - saturnin_sbox(S); - saturnin_sheet(S); - saturnin_mds(S); - saturnin_sheet_inverse(S); - S[0] ^= RC[1]; - saturnin_xor_key(); - } - - /* Encode the state into the output block */ - for (index = 0; index < 16; index += 2) { - temp = S[index / 2]; - output[index] = (uint8_t)temp; - output[index + 1] = (uint8_t)(temp >> 8); - output[index + 16] = (uint8_t)(temp >> 16); - output[index + 17] = (uint8_t)(temp >> 24); - } -} - -/** - * \brief Decrypts a 256-bit block with the SATURNIN block cipher. - * - * \param output Plaintext output block, 32 bytes. - * \param input Ciphertext input block, 32 bytes. - * \param key Points to the 32 byte key for the block cipher. - * \param rounds Number of rounds to perform. - * \param RC Round constants to use for domain separation. - * - * The \a input and \a output buffers can be the same. - * - * \sa saturnin_block_encrypt() - */ -static void saturnin_block_decrypt - (unsigned char *output, const unsigned char *input, - const unsigned char *key, unsigned rounds, const uint32_t *RC) -{ - uint32_t K[16]; - uint32_t S[8]; - uint32_t temp; - unsigned index; - - /* Unpack the key and the input block */ - for (index = 0; index < 16; index += 2) { - temp = ((uint32_t)(key[index])) | - (((uint32_t)(key[index + 1])) << 8) | - (((uint32_t)(key[index + 16])) << 16) | - (((uint32_t)(key[index + 17])) << 24); - K[index / 2] = temp; - K[8 + (index / 2)] = ((temp & 0x001F001FU) << 11) | - ((temp >> 5) & 0x07FF07FFU); - S[index / 2] = ((uint32_t)(input[index])) | - (((uint32_t)(input[index + 1])) << 8) | - (((uint32_t)(input[index + 16])) << 16) | - (((uint32_t)(input[index + 17])) << 24); - } - - /* Perform all decryption rounds */ - RC += rounds - 2; - for (; rounds > 0; rounds -= 2, RC -= 2) { - saturnin_xor_key(); - S[0] ^= RC[1]; - saturnin_sheet(S); - saturnin_mds_inverse(S); - saturnin_sheet_inverse(S); - saturnin_sbox_inverse(S); - saturnin_mds_inverse(S); - saturnin_sbox_inverse(S); - - saturnin_xor_key_rotated(); - S[0] ^= RC[0]; - saturnin_slice(S); - saturnin_mds_inverse(S); - saturnin_slice_inverse(S); - saturnin_sbox_inverse(S); - saturnin_mds_inverse(S); - saturnin_sbox_inverse(S); - } - - /* XOR the key into the state */ - saturnin_xor_key(); - - /* Encode the state into the output block */ - for (index = 0; index < 16; index += 2) { - temp = S[index / 2]; - output[index] = (uint8_t)temp; - output[index + 1] = (uint8_t)(temp >> 8); - output[index + 16] = (uint8_t)(temp >> 16); - output[index + 17] = (uint8_t)(temp >> 24); - } -} - /** * \brief Encrypts a 256-bit block with the SATURNIN block cipher and * then XOR's itself to generate a new key. * * \param block Block to be encrypted and then XOR'ed with itself. * \param key Points to the 32 byte key for the block cipher. - * \param rounds Number of rounds to perform. - * \param RC Round constants to use for domain separation. + * \param domain Domain separator and round counter. */ -void saturnin_block_encrypt_xor - (const unsigned char *block, unsigned char *key, - unsigned rounds, const uint32_t *RC) +static void saturnin_block_encrypt_xor + (const unsigned char *block, unsigned char *key, unsigned domain) { - unsigned char temp[32]; - saturnin_block_encrypt(temp, block, key, rounds, RC); - lw_xor_block_2_src(key, block, temp, 32); + saturnin_key_schedule_t ks; + unsigned char *temp = (unsigned char *)ks.k; /* Reuse some stack space */ + saturnin_setup_key(&ks, key); + saturnin_encrypt_block(&ks, temp, block, domain); + lw_xor_block_2_src(key, block, temp, SATURNIN_BLOCK_SIZE); } /** @@ -499,20 +81,20 @@ void saturnin_block_encrypt_xor * \param c Output ciphertext buffer. * \param m Input plaintext buffer. * \param mlen Length of the plaintext in bytes. - * \param k Points to the 32-byte key. + * \param ks Points to the key schedule. * \param block Points to the pre-formatted nonce block. */ static void saturnin_ctr_encrypt (unsigned char *c, const unsigned char *m, unsigned long long mlen, - const unsigned char *k, unsigned char *block) + const saturnin_key_schedule_t *ks, unsigned char *block) { /* Note: Specification requires a 95-bit counter but we only use 32-bit. * This limits the maximum packet size to 128Gb. That should be OK */ uint32_t counter = 1; - unsigned char out[32]; + unsigned char out[SATURNIN_BLOCK_SIZE]; while (mlen >= 32) { be_store_word32(block + 28, counter); - saturnin_block_encrypt(out, block, k, 10, RC_10_1); + saturnin_encrypt_block(ks, out, block, SATURNIN_DOMAIN_10_1); lw_xor_block_2_src(c, out, m, 32); c += 32; m += 32; @@ -521,7 +103,7 @@ static void saturnin_ctr_encrypt } if (mlen > 0) { be_store_word32(block + 28, counter); - saturnin_block_encrypt(out, block, k, 10, RC_10_1); + saturnin_encrypt_block(ks, out, block, SATURNIN_DOMAIN_10_1); lw_xor_block_2_src(c, out, m, (unsigned)mlen); } } @@ -533,18 +115,17 @@ static void saturnin_ctr_encrypt * \param block Temporary block of 32 bytes from the caller. * \param m Points to the message to be authenticated. * \param mlen Length of the message to be authenticated in bytes. - * \param rounds Number of rounds to perform. - * \param RC1 Round constants to use for domain separation on full blocks. - * \param RC2 Round constants to use for domain separation on the last block. + * \param domain1 Round count and domain separator for full blocks. + * \param domain2 Round count and domain separator for the last block. */ static void saturnin_authenticate (unsigned char *tag, unsigned char *block, const unsigned char *m, unsigned long long mlen, - unsigned rounds, const uint32_t *RC1, const uint32_t *RC2) + unsigned domain1, unsigned domain2) { unsigned temp; while (mlen >= 32) { - saturnin_block_encrypt_xor(m, tag, rounds, RC1); + saturnin_block_encrypt_xor(m, tag, domain1); m += 32; mlen -= 32; } @@ -552,7 +133,7 @@ static void saturnin_authenticate memcpy(block, m, temp); block[temp] = 0x80; memset(block + temp + 1, 0, 31 - temp); - saturnin_block_encrypt_xor(block, tag, rounds, RC2); + saturnin_block_encrypt_xor(block, tag, domain2); } int saturnin_aead_encrypt @@ -563,6 +144,7 @@ int saturnin_aead_encrypt const unsigned char *npub, const unsigned char *k) { + saturnin_key_schedule_t ks; unsigned char block[32]; unsigned char *tag; (void)nsec; @@ -576,17 +158,20 @@ int saturnin_aead_encrypt memset(block + 17, 0, 15); /* Encrypt the plaintext in counter mode to produce the ciphertext */ - saturnin_ctr_encrypt(c, m, mlen, k, block); + saturnin_setup_key(&ks, k); + saturnin_ctr_encrypt(c, m, mlen, &ks, block); /* Set the counter back to zero and then encrypt the nonce */ tag = c + mlen; memcpy(tag, k, 32); memset(block + 17, 0, 15); - saturnin_block_encrypt_xor(block, tag, 10, RC_10_2); + saturnin_block_encrypt_xor(block, tag, SATURNIN_DOMAIN_10_2); /* Authenticate the associated data and the ciphertext */ - saturnin_authenticate(tag, block, ad, adlen, 10, RC_10_2, RC_10_3); - saturnin_authenticate(tag, block, c, mlen, 10, RC_10_4, RC_10_5); + saturnin_authenticate + (tag, block, ad, adlen, SATURNIN_DOMAIN_10_2, SATURNIN_DOMAIN_10_3); + saturnin_authenticate + (tag, block, c, mlen, SATURNIN_DOMAIN_10_4, SATURNIN_DOMAIN_10_5); return 0; } @@ -598,6 +183,7 @@ int saturnin_aead_decrypt const unsigned char *npub, const unsigned char *k) { + saturnin_key_schedule_t ks; unsigned char block[32]; unsigned char tag[32]; (void)nsec; @@ -614,17 +200,20 @@ int saturnin_aead_decrypt /* Encrypt the nonce to initialize the authentication phase */ memcpy(tag, k, 32); - saturnin_block_encrypt_xor(block, tag, 10, RC_10_2); + saturnin_block_encrypt_xor(block, tag, SATURNIN_DOMAIN_10_2); /* Authenticate the associated data and the ciphertext */ - saturnin_authenticate(tag, block, ad, adlen, 10, RC_10_2, RC_10_3); - saturnin_authenticate(tag, block, c, *mlen, 10, RC_10_4, RC_10_5); + saturnin_authenticate + (tag, block, ad, adlen, SATURNIN_DOMAIN_10_2, SATURNIN_DOMAIN_10_3); + saturnin_authenticate + (tag, block, c, *mlen, SATURNIN_DOMAIN_10_4, SATURNIN_DOMAIN_10_5); /* Decrypt the ciphertext in counter mode to produce the plaintext */ memcpy(block, npub, 16); block[16] = 0x80; memset(block + 17, 0, 15); - saturnin_ctr_encrypt(m, c, *mlen, k, block); + saturnin_setup_key(&ks, k); + saturnin_ctr_encrypt(m, c, *mlen, &ks, block); /* Check the authentication tag at the end of the message */ return aead_check_tag @@ -639,6 +228,7 @@ int saturnin_short_aead_encrypt const unsigned char *npub, const unsigned char *k) { + saturnin_key_schedule_t ks; unsigned char block[32]; unsigned temp; (void)nsec; @@ -656,7 +246,8 @@ int saturnin_short_aead_encrypt memset(block + 17 + temp, 0, 15 - temp); /* Encrypt the input block to produce the output ciphertext */ - saturnin_block_encrypt(c, block, k, 10, RC_10_6); + saturnin_setup_key(&ks, k); + saturnin_encrypt_block(&ks, c, block, SATURNIN_DOMAIN_10_6); *clen = 32; return 0; } @@ -669,6 +260,7 @@ int saturnin_short_aead_decrypt const unsigned char *npub, const unsigned char *k) { + saturnin_key_schedule_t ks; unsigned char block[32]; unsigned check1, check2, len; int index, result; @@ -682,7 +274,8 @@ int saturnin_short_aead_decrypt return -1; /* Decrypt the ciphertext block */ - saturnin_block_decrypt(block, c, k, 10, RC_10_6); + saturnin_setup_key(&ks, k); + saturnin_decrypt_block(&ks, block, c, SATURNIN_DOMAIN_10_6); /* Verify that the output block starts with the nonce and that it is * padded correctly. We need to do this very carefully to avoid leaking @@ -723,7 +316,8 @@ int saturnin_hash unsigned char tag[32]; unsigned char block[32]; memset(tag, 0, sizeof(tag)); - saturnin_authenticate(tag, block, in, inlen, 16, RC_16_7, RC_16_8); + saturnin_authenticate + (tag, block, in, inlen, SATURNIN_DOMAIN_16_7, SATURNIN_DOMAIN_16_8); memcpy(out, tag, 32); return 0; } @@ -752,12 +346,14 @@ void saturnin_hash_update state->s.count = 0; in += temp; inlen -= temp; - saturnin_block_encrypt_xor(state->s.block, state->s.hash, 16, RC_16_7); + saturnin_block_encrypt_xor + (state->s.block, state->s.hash, SATURNIN_DOMAIN_16_7); } /* Process full blocks that are aligned at state->s.count == 0 */ while (inlen >= 32) { - saturnin_block_encrypt_xor(in, state->s.hash, 16, RC_16_7); + saturnin_block_encrypt_xor + (in, state->s.hash, SATURNIN_DOMAIN_16_7); in += 32; inlen -= 32; } @@ -776,6 +372,7 @@ void saturnin_hash_finalize memset(state->s.block + state->s.count + 1, 0, 31 - state->s.count); /* Generate the final hash value */ - saturnin_block_encrypt_xor(state->s.block, state->s.hash, 16, RC_16_8); + saturnin_block_encrypt_xor + (state->s.block, state->s.hash, SATURNIN_DOMAIN_16_8); memcpy(out, state->s.hash, 32); } diff --git a/saturnin/Implementations/crypto_aead/saturninshortv2/rhys/internal-saturnin-avr.S b/saturnin/Implementations/crypto_aead/saturninshortv2/rhys/internal-saturnin-avr.S new file mode 100644 index 0000000..f20ce72 --- /dev/null +++ b/saturnin/Implementations/crypto_aead/saturninshortv2/rhys/internal-saturnin-avr.S @@ -0,0 +1,6365 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_0, @object + .size table_0, 384 +table_0: + .byte 194 + .byte 38 + .byte 176 + .byte 78 + .byte 3 + .byte 83 + .byte 89 + .byte 144 + .byte 50 + .byte 230 + .byte 143 + .byte 170 + .byte 146 + .byte 138 + .byte 146 + .byte 254 + .byte 25 + .byte 164 + .byte 21 + .byte 65 + .byte 50 + .byte 149 + .byte 83 + .byte 147 + .byte 78 + .byte 204 + .byte 177 + .byte 93 + .byte 202 + .byte 21 + .byte 21 + .byte 84 + .byte 168 + .byte 85 + .byte 31 + .byte 189 + .byte 13 + .byte 26 + .byte 110 + .byte 90 + .byte 181 + .byte 38 + .byte 69 + .byte 78 + .byte 240 + .byte 95 + .byte 86 + .byte 163 + .byte 216 + .byte 32 + .byte 143 + .byte 15 + .byte 225 + .byte 190 + .byte 84 + .byte 11 + .byte 157 + .byte 108 + .byte 26 + .byte 125 + .byte 10 + .byte 40 + .byte 166 + .byte 23 + .byte 134 + .byte 201 + .byte 70 + .byte 170 + .byte 98 + .byte 144 + .byte 25 + .byte 193 + .byte 222 + .byte 92 + .byte 44 + .byte 24 + .byte 254 + .byte 83 + .byte 13 + .byte 160 + .byte 152 + .byte 38 + .byte 22 + .byte 78 + .byte 161 + .byte 91 + .byte 83 + .byte 178 + .byte 101 + .byte 157 + .byte 143 + .byte 108 + .byte 48 + .byte 173 + .byte 22 + .byte 88 + .byte 250 + .byte 212 + .byte 31 + .byte 105 + .byte 249 + .byte 188 + .byte 245 + .byte 107 + .byte 37 + .byte 53 + .byte 235 + .byte 248 + .byte 250 + .byte 236 + .byte 29 + .byte 178 + .byte 23 + .byte 164 + .byte 61 + .byte 123 + .byte 180 + .byte 148 + .byte 44 + .byte 246 + .byte 91 + .byte 38 + .byte 175 + .byte 79 + .byte 22 + .byte 70 + .byte 72 + .byte 197 + .byte 33 + .byte 173 + .byte 220 + .byte 69 + .byte 7 + .byte 214 + .byte 139 + .byte 224 + .byte 184 + .byte 253 + .byte 4 + .byte 5 + .byte 87 + .byte 82 + .byte 31 + .byte 30 + .byte 22 + .byte 194 + .byte 251 + .byte 69 + .byte 31 + .byte 155 + .byte 82 + .byte 235 + .byte 50 + .byte 78 + .byte 25 + .byte 82 + .byte 24 + .byte 192 + .byte 152 + .byte 84 + .byte 118 + .byte 38 + .byte 252 + .byte 79 + .byte 71 + .byte 66 + .byte 77 + .byte 212 + .byte 156 + .byte 16 + .byte 220 + .byte 38 + .byte 214 + .byte 197 + .byte 201 + .byte 179 + .byte 223 + .byte 69 + .byte 1 + .byte 17 + .byte 164 + .byte 198 + .byte 76 + .byte 98 + .byte 181 + .byte 62 + .byte 86 + .byte 23 + .byte 135 + .byte 231 + .byte 86 + .byte 152 + .byte 251 + .byte 182 + .byte 8 + .byte 49 + .byte 82 + .byte 7 + .byte 185 + .byte 2 + .byte 1 + .byte 38 + .byte 9 + .byte 79 + .byte 180 + .byte 78 + .byte 66 + .byte 231 + .byte 118 + .byte 214 + .byte 220 + .byte 131 + .byte 165 + .byte 241 + .byte 15 + .byte 70 + .byte 91 + .byte 141 + .byte 14 + .byte 45 + .byte 156 + .byte 123 + .byte 185 + .byte 230 + .byte 125 + .byte 59 + .byte 161 + .byte 224 + .byte 47 + .byte 98 + .byte 90 + .byte 13 + .byte 141 + .byte 191 + .byte 59 + .byte 148 + .byte 161 + .byte 78 + .byte 218 + .byte 248 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 12 + .byte 24 + .byte 186 + .byte 63 + .byte 171 + .byte 185 + .byte 58 + .byte 86 + .byte 239 + .byte 165 + .byte 94 + .byte 18 + .byte 108 + .byte 162 + .byte 157 + .byte 133 + .byte 155 + .byte 119 + .byte 207 + .byte 184 + .byte 147 + .byte 231 + .byte 77 + .byte 125 + .byte 159 + .byte 180 + .byte 239 + .byte 7 + .byte 6 + .byte 83 + .byte 82 + .byte 141 + .byte 171 + .byte 230 + .byte 8 + .byte 30 + .byte 135 + .byte 159 + .byte 114 + .byte 65 + .byte 10 + .byte 239 + .byte 74 + .byte 140 + .byte 167 + .byte 201 + .byte 160 + .byte 74 + .byte 239 + .byte 149 + .byte 58 + .byte 217 + .byte 175 + .byte 210 + .byte 0 + .byte 187 + .byte 240 + .byte 91 + .byte 44 + .byte 182 + .byte 216 + .byte 148 + .byte 109 + .byte 56 + .byte 167 + .byte 25 + .byte 155 + .byte 60 + .byte 148 + .byte 134 + .byte 9 + .byte 169 + .byte 218 + .byte 120 + .byte 248 + .byte 35 + .byte 211 + .byte 71 + .byte 182 + .byte 167 + .byte 120 + .byte 157 + .byte 252 + .byte 116 + .byte 17 + .byte 174 + .byte 202 + .byte 234 + .byte 119 + .byte 166 + .byte 49 + .byte 47 + .byte 84 + .byte 192 + .byte 200 + .byte 76 + .byte 5 + .byte 202 + .byte 81 + .byte 47 + .byte 149 + .byte 241 + .byte 104 + .byte 82 + .byte 43 + .byte 138 + .byte 91 + .byte 79 + .byte 172 + .byte 180 + .byte 20 + .byte 246 + .byte 1 + .byte 84 + .byte 217 + .byte 241 + .byte 104 + .byte 37 + .byte 77 + .byte 118 + .byte 17 + .byte 54 + .byte 73 + .byte 106 + .byte 62 + .byte 156 + .byte 239 + .byte 142 + + .text +.global saturnin_setup_key + .type saturnin_setup_key, @function +saturnin_setup_key: + movw r30,r24 + movw r26,r22 +.L__stack_usage = 2 + ld r18,X+ + ld r19,X+ + st Z,r18 + std Z+1,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+32,r18 + std Z+33,r19 + ld r18,X+ + ld r19,X+ + std Z+4,r18 + std Z+5,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+36,r18 + std Z+37,r19 + ld r18,X+ + ld r19,X+ + std Z+8,r18 + std Z+9,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+40,r18 + std Z+41,r19 + ld r18,X+ + ld r19,X+ + std Z+12,r18 + std Z+13,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+44,r18 + std Z+45,r19 + ld r18,X+ + ld r19,X+ + std Z+16,r18 + std Z+17,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+48,r18 + std Z+49,r19 + ld r18,X+ + ld r19,X+ + std Z+20,r18 + std Z+21,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+52,r18 + std Z+53,r19 + ld r18,X+ + ld r19,X+ + std Z+24,r18 + std Z+25,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+56,r18 + std Z+57,r19 + ld r18,X+ + ld r19,X+ + std Z+28,r18 + std Z+29,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+60,r18 + std Z+61,r19 + ld r18,X+ + ld r19,X+ + std Z+2,r18 + std Z+3,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+34,r18 + std Z+35,r19 + ld r18,X+ + ld r19,X+ + std Z+6,r18 + std Z+7,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+38,r18 + std Z+39,r19 + ld r18,X+ + ld r19,X+ + std Z+10,r18 + std Z+11,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+42,r18 + std Z+43,r19 + ld r18,X+ + ld r19,X+ + std Z+14,r18 + std Z+15,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+46,r18 + std Z+47,r19 + ld r18,X+ + ld r19,X+ + std Z+18,r18 + std Z+19,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+50,r18 + std Z+51,r19 + ld r18,X+ + ld r19,X+ + std Z+22,r18 + std Z+23,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+54,r18 + std Z+55,r19 + ld r18,X+ + ld r19,X+ + std Z+26,r18 + std Z+27,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+58,r18 + std Z+59,r19 + ld r18,X+ + ld r19,X+ + std Z+30,r18 + std Z+31,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+62,r18 + std Z+63,r19 + ret + .size saturnin_setup_key, .-saturnin_setup_key + + .text +.global saturnin_encrypt_block + .type saturnin_encrypt_block, @function +saturnin_encrypt_block: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r23 + push r22 + movw r30,r24 + movw r26,r20 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 52 + ld r2,X+ + ld r3,X+ + ld r0,Z + eor r2,r0 + ldd r0,Z+1 + eor r3,r0 + ld r6,X+ + ld r7,X+ + ldd r0,Z+4 + eor r6,r0 + ldd r0,Z+5 + eor r7,r0 + ld r10,X+ + ld r11,X+ + ldd r0,Z+8 + eor r10,r0 + ldd r0,Z+9 + eor r11,r0 + ld r14,X+ + ld r15,X+ + ldd r0,Z+12 + eor r14,r0 + ldd r0,Z+13 + eor r15,r0 + ld r20,X+ + ld r21,X+ + ldd r0,Z+16 + eor r20,r0 + ldd r0,Z+17 + eor r21,r0 + std Y+17,r20 + std Y+18,r21 + ld r20,X+ + ld r21,X+ + ldd r0,Z+20 + eor r20,r0 + ldd r0,Z+21 + eor r21,r0 + std Y+21,r20 + std Y+22,r21 + ld r20,X+ + ld r21,X+ + ldd r0,Z+24 + eor r20,r0 + ldd r0,Z+25 + eor r21,r0 + std Y+25,r20 + std Y+26,r21 + ld r20,X+ + ld r21,X+ + ldd r0,Z+28 + eor r20,r0 + ldd r0,Z+29 + eor r21,r0 + std Y+29,r20 + std Y+30,r21 + ld r4,X+ + ld r5,X+ + ldd r0,Z+2 + eor r4,r0 + ldd r0,Z+3 + eor r5,r0 + ld r8,X+ + ld r9,X+ + ldd r0,Z+6 + eor r8,r0 + ldd r0,Z+7 + eor r9,r0 + ld r12,X+ + ld r13,X+ + ldd r0,Z+10 + eor r12,r0 + ldd r0,Z+11 + eor r13,r0 + ld r24,X+ + ld r25,X+ + ldd r0,Z+14 + eor r24,r0 + ldd r0,Z+15 + eor r25,r0 + ld r20,X+ + ld r21,X+ + ldd r0,Z+18 + eor r20,r0 + ldd r0,Z+19 + eor r21,r0 + std Y+19,r20 + std Y+20,r21 + ld r20,X+ + ld r21,X+ + ldd r0,Z+22 + eor r20,r0 + ldd r0,Z+23 + eor r21,r0 + std Y+23,r20 + std Y+24,r21 + ld r20,X+ + ld r21,X+ + ldd r0,Z+26 + eor r20,r0 + ldd r0,Z+27 + eor r21,r0 + std Y+27,r20 + std Y+28,r21 + ld r20,X+ + ld r21,X+ + ldd r0,Z+30 + eor r20,r0 + ldd r0,Z+31 + eor r21,r0 + std Y+31,r20 + std Y+32,r21 + ldi r16,5 + cpi r18,60 + cpc r19,r1 + brcs 120f + ldi r16,8 + ldi r17,4 + add r18,r17 + adc r19,r1 +120: + lsl r18 + rol r19 + lsl r18 + rol r19 + rjmp 1447f +126: + mov r0,r6 + and r0,r10 + eor r2,r0 + mov r0,r7 + and r0,r11 + eor r3,r0 + mov r0,r8 + and r0,r12 + eor r4,r0 + mov r0,r9 + and r0,r13 + eor r5,r0 + mov r0,r2 + or r0,r14 + eor r6,r0 + mov r0,r3 + or r0,r15 + eor r7,r0 + mov r0,r4 + or r0,r24 + eor r8,r0 + mov r0,r5 + or r0,r25 + eor r9,r0 + mov r0,r6 + or r0,r10 + eor r14,r0 + mov r0,r7 + or r0,r11 + eor r15,r0 + mov r0,r8 + or r0,r12 + eor r24,r0 + mov r0,r9 + or r0,r13 + eor r25,r0 + mov r0,r6 + and r0,r14 + eor r10,r0 + mov r0,r7 + and r0,r15 + eor r11,r0 + mov r0,r8 + and r0,r24 + eor r12,r0 + mov r0,r9 + and r0,r25 + eor r13,r0 + mov r0,r2 + or r0,r10 + eor r6,r0 + mov r0,r3 + or r0,r11 + eor r7,r0 + mov r0,r4 + or r0,r12 + eor r8,r0 + mov r0,r5 + or r0,r13 + eor r9,r0 + mov r0,r6 + or r0,r14 + eor r2,r0 + mov r0,r7 + or r0,r15 + eor r3,r0 + mov r0,r8 + or r0,r24 + eor r4,r0 + mov r0,r9 + or r0,r25 + eor r5,r0 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Y+17 + ldd r3,Y+18 + ldd r4,Y+19 + ldd r5,Y+20 + ldd r6,Y+21 + ldd r7,Y+22 + ldd r8,Y+23 + ldd r9,Y+24 + ldd r10,Y+25 + ldd r11,Y+26 + ldd r12,Y+27 + ldd r13,Y+28 + ldd r14,Y+29 + ldd r15,Y+30 + ldd r24,Y+31 + ldd r25,Y+32 + mov r0,r6 + and r0,r10 + eor r2,r0 + mov r0,r7 + and r0,r11 + eor r3,r0 + mov r0,r8 + and r0,r12 + eor r4,r0 + mov r0,r9 + and r0,r13 + eor r5,r0 + mov r0,r2 + or r0,r14 + eor r6,r0 + mov r0,r3 + or r0,r15 + eor r7,r0 + mov r0,r4 + or r0,r24 + eor r8,r0 + mov r0,r5 + or r0,r25 + eor r9,r0 + mov r0,r6 + or r0,r10 + eor r14,r0 + mov r0,r7 + or r0,r11 + eor r15,r0 + mov r0,r8 + or r0,r12 + eor r24,r0 + mov r0,r9 + or r0,r13 + eor r25,r0 + mov r0,r6 + and r0,r14 + eor r10,r0 + mov r0,r7 + and r0,r15 + eor r11,r0 + mov r0,r8 + and r0,r24 + eor r12,r0 + mov r0,r9 + and r0,r25 + eor r13,r0 + mov r0,r2 + or r0,r10 + eor r6,r0 + mov r0,r3 + or r0,r11 + eor r7,r0 + mov r0,r4 + or r0,r12 + eor r8,r0 + mov r0,r5 + or r0,r13 + eor r9,r0 + mov r0,r6 + or r0,r14 + eor r2,r0 + mov r0,r7 + or r0,r15 + eor r3,r0 + mov r0,r8 + or r0,r24 + eor r4,r0 + mov r0,r9 + or r0,r25 + eor r5,r0 + ldd r0,Y+5 + eor r0,r14 + std Y+5,r0 + ldd r0,Y+6 + eor r0,r15 + std Y+6,r0 + ldd r0,Y+7 + eor r0,r24 + std Y+7,r0 + ldd r0,Y+8 + eor r0,r25 + std Y+8,r0 + ldd r0,Y+9 + eor r0,r6 + std Y+9,r0 + ldd r0,Y+10 + eor r0,r7 + std Y+10,r0 + ldd r0,Y+11 + eor r0,r8 + std Y+11,r0 + ldd r0,Y+12 + eor r0,r9 + std Y+12,r0 + ldd r0,Y+13 + eor r0,r2 + std Y+13,r0 + ldd r0,Y+14 + eor r0,r3 + std Y+14,r0 + ldd r0,Y+15 + eor r0,r4 + std Y+15,r0 + ldd r0,Y+16 + eor r0,r5 + std Y+16,r0 + ldd r0,Y+1 + eor r0,r10 + std Y+1,r0 + ldd r0,Y+2 + eor r0,r11 + std Y+2,r0 + ldd r0,Y+3 + eor r0,r12 + std Y+3,r0 + ldd r0,Y+4 + eor r0,r13 + std Y+4,r0 + movw r20,r14 + movw r22,r24 + movw r14,r6 + movw r24,r8 + movw r6,r2 + movw r8,r4 + movw r2,r10 + movw r4,r12 + movw r10,r20 + movw r12,r22 + eor r10,r14 + eor r11,r15 + eor r12,r24 + eor r13,r25 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+9 + ldd r23,Y+10 + eor r20,r22 + eor r21,r23 + std Y+5,r22 + std Y+6,r23 + ldd r22,Y+13 + ldd r23,Y+14 + std Y+9,r22 + std Y+10,r23 + ldd r22,Y+1 + ldd r23,Y+2 + std Y+13,r22 + std Y+14,r23 + std Y+1,r20 + std Y+2,r21 + ldd r20,Y+7 + ldd r21,Y+8 + ldd r22,Y+11 + ldd r23,Y+12 + eor r20,r22 + eor r21,r23 + std Y+7,r22 + std Y+8,r23 + ldd r22,Y+15 + ldd r23,Y+16 + std Y+11,r22 + std Y+12,r23 + ldd r22,Y+3 + ldd r23,Y+4 + std Y+15,r22 + std Y+16,r23 + std Y+3,r20 + std Y+4,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+9 + ldd r23,Y+10 + eor r20,r22 + eor r21,r23 + eor r22,r14 + eor r23,r15 + std Y+5,r22 + std Y+6,r23 + ldd r22,Y+13 + ldd r23,Y+14 + eor r22,r6 + eor r23,r7 + std Y+9,r22 + std Y+10,r23 + ldd r22,Y+1 + ldd r23,Y+2 + eor r22,r2 + eor r23,r3 + std Y+13,r22 + std Y+14,r23 + eor r20,r10 + eor r21,r11 + std Y+1,r20 + std Y+2,r21 + ldd r20,Y+7 + ldd r21,Y+8 + ldd r22,Y+11 + ldd r23,Y+12 + eor r20,r22 + eor r21,r23 + eor r22,r24 + eor r23,r25 + std Y+7,r22 + std Y+8,r23 + ldd r22,Y+15 + ldd r23,Y+16 + eor r22,r8 + eor r23,r9 + std Y+11,r22 + std Y+12,r23 + ldd r22,Y+3 + ldd r23,Y+4 + eor r22,r4 + eor r23,r5 + std Y+15,r22 + std Y+16,r23 + eor r20,r12 + eor r21,r13 + std Y+3,r20 + std Y+4,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + mov r0,r6 + and r0,r2 + eor r14,r0 + mov r0,r7 + and r0,r3 + eor r15,r0 + mov r0,r8 + and r0,r4 + eor r24,r0 + mov r0,r9 + and r0,r5 + eor r25,r0 + mov r0,r14 + or r0,r10 + eor r6,r0 + mov r0,r15 + or r0,r11 + eor r7,r0 + mov r0,r24 + or r0,r12 + eor r8,r0 + mov r0,r25 + or r0,r13 + eor r9,r0 + mov r0,r6 + or r0,r2 + eor r10,r0 + mov r0,r7 + or r0,r3 + eor r11,r0 + mov r0,r8 + or r0,r4 + eor r12,r0 + mov r0,r9 + or r0,r5 + eor r13,r0 + mov r0,r6 + and r0,r10 + eor r2,r0 + mov r0,r7 + and r0,r11 + eor r3,r0 + mov r0,r8 + and r0,r12 + eor r4,r0 + mov r0,r9 + and r0,r13 + eor r5,r0 + mov r0,r14 + or r0,r2 + eor r6,r0 + mov r0,r15 + or r0,r3 + eor r7,r0 + mov r0,r24 + or r0,r4 + eor r8,r0 + mov r0,r25 + or r0,r5 + eor r9,r0 + mov r0,r6 + or r0,r10 + eor r14,r0 + mov r0,r7 + or r0,r11 + eor r15,r0 + mov r0,r8 + or r0,r12 + eor r24,r0 + mov r0,r9 + or r0,r13 + eor r25,r0 + movw r20,r14 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r14,r22 + and r15,r22 + lsl r14 + rol r15 + or r14,r20 + or r15,r21 + movw r20,r24 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r24,r22 + and r25,r22 + lsl r24 + rol r25 + lsl r24 + rol r25 + lsl r24 + rol r25 + or r24,r20 + or r25,r21 + movw r20,r6 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r6,r22 + and r7,r22 + lsl r6 + rol r7 + or r6,r20 + or r7,r21 + movw r20,r8 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r8,r22 + and r9,r22 + lsl r8 + rol r9 + lsl r8 + rol r9 + lsl r8 + rol r9 + or r8,r20 + or r9,r21 + movw r20,r2 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r2,r22 + and r3,r22 + lsl r2 + rol r3 + or r2,r20 + or r3,r21 + movw r20,r4 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r4,r22 + and r5,r22 + lsl r4 + rol r5 + lsl r4 + rol r5 + lsl r4 + rol r5 + or r4,r20 + or r5,r21 + movw r20,r10 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r10,r22 + and r11,r22 + lsl r10 + rol r11 + or r10,r20 + or r11,r21 + movw r20,r12 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r12,r22 + and r13,r22 + lsl r12 + rol r13 + lsl r12 + rol r13 + lsl r12 + rol r13 + or r12,r20 + or r13,r21 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r3,Y+2 + ldd r4,Y+3 + ldd r5,Y+4 + ldd r6,Y+5 + ldd r7,Y+6 + ldd r8,Y+7 + ldd r9,Y+8 + ldd r10,Y+9 + ldd r11,Y+10 + ldd r12,Y+11 + ldd r13,Y+12 + ldd r14,Y+13 + ldd r15,Y+14 + ldd r24,Y+15 + ldd r25,Y+16 + mov r0,r10 + and r0,r14 + eor r6,r0 + mov r0,r11 + and r0,r15 + eor r7,r0 + mov r0,r12 + and r0,r24 + eor r8,r0 + mov r0,r13 + and r0,r25 + eor r9,r0 + mov r0,r6 + or r0,r2 + eor r10,r0 + mov r0,r7 + or r0,r3 + eor r11,r0 + mov r0,r8 + or r0,r4 + eor r12,r0 + mov r0,r9 + or r0,r5 + eor r13,r0 + mov r0,r10 + or r0,r14 + eor r2,r0 + mov r0,r11 + or r0,r15 + eor r3,r0 + mov r0,r12 + or r0,r24 + eor r4,r0 + mov r0,r13 + or r0,r25 + eor r5,r0 + mov r0,r10 + and r0,r2 + eor r14,r0 + mov r0,r11 + and r0,r3 + eor r15,r0 + mov r0,r12 + and r0,r4 + eor r24,r0 + mov r0,r13 + and r0,r5 + eor r25,r0 + mov r0,r6 + or r0,r14 + eor r10,r0 + mov r0,r7 + or r0,r15 + eor r11,r0 + mov r0,r8 + or r0,r24 + eor r12,r0 + mov r0,r9 + or r0,r25 + eor r13,r0 + mov r0,r10 + or r0,r2 + eor r6,r0 + mov r0,r11 + or r0,r3 + eor r7,r0 + mov r0,r12 + or r0,r4 + eor r8,r0 + mov r0,r13 + or r0,r5 + eor r9,r0 + movw r20,r8 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r8,r22 + and r9,r22 + lsl r8 + rol r9 + lsl r8 + rol r9 + or r8,r20 + or r9,r21 + movw r20,r12 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r12,r22 + and r13,r22 + lsl r12 + rol r13 + lsl r12 + rol r13 + or r12,r20 + or r13,r21 + movw r20,r24 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r24,r22 + and r25,r22 + lsl r24 + rol r25 + lsl r24 + rol r25 + or r24,r20 + or r25,r21 + movw r20,r4 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r4,r22 + and r5,r22 + lsl r4 + rol r5 + lsl r4 + rol r5 + or r4,r20 + or r5,r21 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Y+17 + ldd r3,Y+18 + ldd r4,Y+19 + ldd r5,Y+20 + ldd r6,Y+21 + ldd r7,Y+22 + ldd r8,Y+23 + ldd r9,Y+24 + ldd r10,Y+25 + ldd r11,Y+26 + ldd r12,Y+27 + ldd r13,Y+28 + ldd r14,Y+29 + ldd r15,Y+30 + ldd r24,Y+31 + ldd r25,Y+32 + ldd r0,Y+9 + eor r0,r10 + std Y+9,r0 + ldd r0,Y+10 + eor r0,r11 + std Y+10,r0 + ldd r0,Y+11 + eor r0,r12 + std Y+11,r0 + ldd r0,Y+12 + eor r0,r13 + std Y+12,r0 + ldd r0,Y+13 + eor r0,r6 + std Y+13,r0 + ldd r0,Y+14 + eor r0,r7 + std Y+14,r0 + ldd r0,Y+15 + eor r0,r8 + std Y+15,r0 + ldd r0,Y+16 + eor r0,r9 + std Y+16,r0 + ldd r0,Y+1 + eor r0,r14 + std Y+1,r0 + ldd r0,Y+2 + eor r0,r15 + std Y+2,r0 + ldd r0,Y+3 + eor r0,r24 + std Y+3,r0 + ldd r0,Y+4 + eor r0,r25 + std Y+4,r0 + ldd r0,Y+5 + eor r0,r2 + std Y+5,r0 + ldd r0,Y+6 + eor r0,r3 + std Y+6,r0 + ldd r0,Y+7 + eor r0,r4 + std Y+7,r0 + ldd r0,Y+8 + eor r0,r5 + std Y+8,r0 + movw r20,r10 + movw r22,r12 + movw r10,r6 + movw r12,r8 + movw r6,r14 + movw r8,r24 + movw r14,r2 + movw r24,r4 + movw r2,r20 + movw r4,r22 + eor r2,r10 + eor r3,r11 + eor r4,r12 + eor r5,r13 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+13 + ldd r23,Y+14 + eor r20,r22 + eor r21,r23 + std Y+9,r22 + std Y+10,r23 + ldd r22,Y+1 + ldd r23,Y+2 + std Y+13,r22 + std Y+14,r23 + ldd r22,Y+5 + ldd r23,Y+6 + std Y+1,r22 + std Y+2,r23 + std Y+5,r20 + std Y+6,r21 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r22,Y+15 + ldd r23,Y+16 + eor r20,r22 + eor r21,r23 + std Y+11,r22 + std Y+12,r23 + ldd r22,Y+3 + ldd r23,Y+4 + std Y+15,r22 + std Y+16,r23 + ldd r22,Y+7 + ldd r23,Y+8 + std Y+3,r22 + std Y+4,r23 + std Y+7,r20 + std Y+8,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+13 + ldd r23,Y+14 + eor r20,r22 + eor r21,r23 + eor r22,r10 + eor r23,r11 + std Y+9,r22 + std Y+10,r23 + ldd r22,Y+1 + ldd r23,Y+2 + eor r22,r6 + eor r23,r7 + std Y+13,r22 + std Y+14,r23 + ldd r22,Y+5 + ldd r23,Y+6 + eor r22,r14 + eor r23,r15 + std Y+1,r22 + std Y+2,r23 + eor r20,r2 + eor r21,r3 + std Y+5,r20 + std Y+6,r21 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r22,Y+15 + ldd r23,Y+16 + eor r20,r22 + eor r21,r23 + eor r22,r12 + eor r23,r13 + std Y+11,r22 + std Y+12,r23 + ldd r22,Y+3 + ldd r23,Y+4 + eor r22,r8 + eor r23,r9 + std Y+15,r22 + std Y+16,r23 + ldd r22,Y+7 + ldd r23,Y+8 + eor r22,r24 + eor r23,r25 + std Y+3,r22 + std Y+4,r23 + eor r20,r4 + eor r21,r5 + std Y+7,r20 + std Y+8,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + movw r20,r12 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r12,r22 + and r13,r22 + lsl r12 + rol r13 + or r12,r20 + or r13,r21 + movw r20,r10 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r10,r22 + and r11,r22 + lsl r10 + rol r11 + lsl r10 + rol r11 + lsl r10 + rol r11 + or r10,r20 + or r11,r21 + movw r20,r8 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r8,r22 + and r9,r22 + lsl r8 + rol r9 + or r8,r20 + or r9,r21 + movw r20,r6 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r6,r22 + and r7,r22 + lsl r6 + rol r7 + lsl r6 + rol r7 + lsl r6 + rol r7 + or r6,r20 + or r7,r21 + movw r20,r24 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r24,r22 + and r25,r22 + lsl r24 + rol r25 + or r24,r20 + or r25,r21 + movw r20,r14 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r14,r22 + and r15,r22 + lsl r14 + rol r15 + lsl r14 + rol r15 + lsl r14 + rol r15 + or r14,r20 + or r15,r21 + movw r20,r4 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r4,r22 + and r5,r22 + lsl r4 + rol r5 + or r4,r20 + or r5,r21 + movw r20,r2 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r2,r22 + and r3,r22 + lsl r2 + rol r3 + lsl r2 + rol r3 + lsl r2 + rol r3 + or r2,r20 + or r3,r21 + ldd r0,Z+48 + eor r10,r0 + ldd r0,Z+49 + eor r11,r0 + ldd r0,Z+50 + eor r12,r0 + ldd r0,Z+51 + eor r13,r0 + ldd r0,Z+52 + eor r6,r0 + ldd r0,Z+53 + eor r7,r0 + ldd r0,Z+54 + eor r8,r0 + ldd r0,Z+55 + eor r9,r0 + ldd r0,Z+56 + eor r14,r0 + ldd r0,Z+57 + eor r15,r0 + ldd r0,Z+58 + eor r24,r0 + ldd r0,Z+59 + eor r25,r0 + ldd r0,Z+60 + eor r2,r0 + ldd r0,Z+61 + eor r3,r0 + ldd r0,Z+62 + eor r4,r0 + ldd r0,Z+63 + eor r5,r0 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r3,Y+2 + ldd r4,Y+3 + ldd r5,Y+4 + ldd r6,Y+5 + ldd r7,Y+6 + ldd r8,Y+7 + ldd r9,Y+8 + ldd r10,Y+9 + ldd r11,Y+10 + ldd r12,Y+11 + ldd r13,Y+12 + ldd r14,Y+13 + ldd r15,Y+14 + ldd r24,Y+15 + ldd r25,Y+16 + movw r20,r12 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r12,r22 + and r13,r22 + lsl r12 + rol r13 + lsl r12 + rol r13 + or r12,r20 + or r13,r21 + movw r20,r24 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r24,r22 + and r25,r22 + lsl r24 + rol r25 + lsl r24 + rol r25 + or r24,r20 + or r25,r21 + movw r20,r4 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r4,r22 + and r5,r22 + lsl r4 + rol r5 + lsl r4 + rol r5 + or r4,r20 + or r5,r21 + movw r20,r8 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r8,r22 + and r9,r22 + lsl r8 + rol r9 + lsl r8 + rol r9 + or r8,r20 + or r9,r21 + ldd r0,Z+32 + eor r10,r0 + ldd r0,Z+33 + eor r11,r0 + ldd r0,Z+34 + eor r12,r0 + ldd r0,Z+35 + eor r13,r0 + ldd r0,Z+36 + eor r14,r0 + ldd r0,Z+37 + eor r15,r0 + ldd r0,Z+38 + eor r24,r0 + ldd r0,Z+39 + eor r25,r0 + ldd r0,Z+40 + eor r2,r0 + ldd r0,Z+41 + eor r3,r0 + ldd r0,Z+42 + eor r4,r0 + ldd r0,Z+43 + eor r5,r0 + ldd r0,Z+44 + eor r6,r0 + ldd r0,Z+45 + eor r7,r0 + ldd r0,Z+46 + eor r8,r0 + ldd r0,Z+47 + eor r9,r0 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r17,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r17 +#endif + add r31,r19 + mov r30,r18 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + inc r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + inc r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r22,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r22,Z +#elif defined(__AVR_TINY__) + ld r22,Z +#else + lpm + mov r22,r0 +#endif + inc r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r23,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r23,Z +#elif defined(__AVR_TINY__) + ld r23,Z +#else + lpm + mov r23,r0 +#endif + inc r18 + eor r10,r20 + eor r11,r21 + eor r12,r22 + eor r13,r23 +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + ret +1447: + rcall 126b + rcall 1453f + dec r16 + brne 1447b + rjmp 2622f +1453: + mov r0,r14 + and r0,r2 + eor r10,r0 + mov r0,r15 + and r0,r3 + eor r11,r0 + mov r0,r24 + and r0,r4 + eor r12,r0 + mov r0,r25 + and r0,r5 + eor r13,r0 + mov r0,r10 + or r0,r6 + eor r14,r0 + mov r0,r11 + or r0,r7 + eor r15,r0 + mov r0,r12 + or r0,r8 + eor r24,r0 + mov r0,r13 + or r0,r9 + eor r25,r0 + mov r0,r14 + or r0,r2 + eor r6,r0 + mov r0,r15 + or r0,r3 + eor r7,r0 + mov r0,r24 + or r0,r4 + eor r8,r0 + mov r0,r25 + or r0,r5 + eor r9,r0 + mov r0,r14 + and r0,r6 + eor r2,r0 + mov r0,r15 + and r0,r7 + eor r3,r0 + mov r0,r24 + and r0,r8 + eor r4,r0 + mov r0,r25 + and r0,r9 + eor r5,r0 + mov r0,r10 + or r0,r2 + eor r14,r0 + mov r0,r11 + or r0,r3 + eor r15,r0 + mov r0,r12 + or r0,r4 + eor r24,r0 + mov r0,r13 + or r0,r5 + eor r25,r0 + mov r0,r14 + or r0,r6 + eor r10,r0 + mov r0,r15 + or r0,r7 + eor r11,r0 + mov r0,r24 + or r0,r8 + eor r12,r0 + mov r0,r25 + or r0,r9 + eor r13,r0 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Y+17 + ldd r3,Y+18 + ldd r4,Y+19 + ldd r5,Y+20 + ldd r6,Y+21 + ldd r7,Y+22 + ldd r8,Y+23 + ldd r9,Y+24 + ldd r10,Y+25 + ldd r11,Y+26 + ldd r12,Y+27 + ldd r13,Y+28 + ldd r14,Y+29 + ldd r15,Y+30 + ldd r24,Y+31 + ldd r25,Y+32 + mov r0,r6 + and r0,r14 + eor r10,r0 + mov r0,r7 + and r0,r15 + eor r11,r0 + mov r0,r8 + and r0,r24 + eor r12,r0 + mov r0,r9 + and r0,r25 + eor r13,r0 + mov r0,r10 + or r0,r2 + eor r6,r0 + mov r0,r11 + or r0,r3 + eor r7,r0 + mov r0,r12 + or r0,r4 + eor r8,r0 + mov r0,r13 + or r0,r5 + eor r9,r0 + mov r0,r6 + or r0,r14 + eor r2,r0 + mov r0,r7 + or r0,r15 + eor r3,r0 + mov r0,r8 + or r0,r24 + eor r4,r0 + mov r0,r9 + or r0,r25 + eor r5,r0 + mov r0,r6 + and r0,r2 + eor r14,r0 + mov r0,r7 + and r0,r3 + eor r15,r0 + mov r0,r8 + and r0,r4 + eor r24,r0 + mov r0,r9 + and r0,r5 + eor r25,r0 + mov r0,r10 + or r0,r14 + eor r6,r0 + mov r0,r11 + or r0,r15 + eor r7,r0 + mov r0,r12 + or r0,r24 + eor r8,r0 + mov r0,r13 + or r0,r25 + eor r9,r0 + mov r0,r6 + or r0,r2 + eor r10,r0 + mov r0,r7 + or r0,r3 + eor r11,r0 + mov r0,r8 + or r0,r4 + eor r12,r0 + mov r0,r9 + or r0,r5 + eor r13,r0 + ldd r0,Y+13 + eor r0,r2 + std Y+13,r0 + ldd r0,Y+14 + eor r0,r3 + std Y+14,r0 + ldd r0,Y+15 + eor r0,r4 + std Y+15,r0 + ldd r0,Y+16 + eor r0,r5 + std Y+16,r0 + ldd r0,Y+1 + eor r0,r6 + std Y+1,r0 + ldd r0,Y+2 + eor r0,r7 + std Y+2,r0 + ldd r0,Y+3 + eor r0,r8 + std Y+3,r0 + ldd r0,Y+4 + eor r0,r9 + std Y+4,r0 + ldd r0,Y+5 + eor r0,r10 + std Y+5,r0 + ldd r0,Y+6 + eor r0,r11 + std Y+6,r0 + ldd r0,Y+7 + eor r0,r12 + std Y+7,r0 + ldd r0,Y+8 + eor r0,r13 + std Y+8,r0 + ldd r0,Y+9 + eor r0,r14 + std Y+9,r0 + ldd r0,Y+10 + eor r0,r15 + std Y+10,r0 + ldd r0,Y+11 + eor r0,r24 + std Y+11,r0 + ldd r0,Y+12 + eor r0,r25 + std Y+12,r0 + movw r20,r2 + movw r22,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r20 + movw r24,r22 + eor r14,r2 + eor r15,r3 + eor r24,r4 + eor r25,r5 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+1 + ldd r23,Y+2 + eor r20,r22 + eor r21,r23 + std Y+13,r22 + std Y+14,r23 + ldd r22,Y+5 + ldd r23,Y+6 + std Y+1,r22 + std Y+2,r23 + ldd r22,Y+9 + ldd r23,Y+10 + std Y+5,r22 + std Y+6,r23 + std Y+9,r20 + std Y+10,r21 + ldd r20,Y+15 + ldd r21,Y+16 + ldd r22,Y+3 + ldd r23,Y+4 + eor r20,r22 + eor r21,r23 + std Y+15,r22 + std Y+16,r23 + ldd r22,Y+7 + ldd r23,Y+8 + std Y+3,r22 + std Y+4,r23 + ldd r22,Y+11 + ldd r23,Y+12 + std Y+7,r22 + std Y+8,r23 + std Y+11,r20 + std Y+12,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+1 + ldd r23,Y+2 + eor r20,r22 + eor r21,r23 + eor r22,r2 + eor r23,r3 + std Y+13,r22 + std Y+14,r23 + ldd r22,Y+5 + ldd r23,Y+6 + eor r22,r6 + eor r23,r7 + std Y+1,r22 + std Y+2,r23 + ldd r22,Y+9 + ldd r23,Y+10 + eor r22,r10 + eor r23,r11 + std Y+5,r22 + std Y+6,r23 + eor r20,r14 + eor r21,r15 + std Y+9,r20 + std Y+10,r21 + ldd r20,Y+15 + ldd r21,Y+16 + ldd r22,Y+3 + ldd r23,Y+4 + eor r20,r22 + eor r21,r23 + eor r22,r4 + eor r23,r5 + std Y+15,r22 + std Y+16,r23 + ldd r22,Y+7 + ldd r23,Y+8 + eor r22,r8 + eor r23,r9 + std Y+3,r22 + std Y+4,r23 + ldd r22,Y+11 + ldd r23,Y+12 + eor r22,r12 + eor r23,r13 + std Y+7,r22 + std Y+8,r23 + eor r20,r24 + eor r21,r25 + std Y+11,r20 + std Y+12,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + mov r0,r6 + and r0,r10 + eor r2,r0 + mov r0,r7 + and r0,r11 + eor r3,r0 + mov r0,r8 + and r0,r12 + eor r4,r0 + mov r0,r9 + and r0,r13 + eor r5,r0 + mov r0,r2 + or r0,r14 + eor r6,r0 + mov r0,r3 + or r0,r15 + eor r7,r0 + mov r0,r4 + or r0,r24 + eor r8,r0 + mov r0,r5 + or r0,r25 + eor r9,r0 + mov r0,r6 + or r0,r10 + eor r14,r0 + mov r0,r7 + or r0,r11 + eor r15,r0 + mov r0,r8 + or r0,r12 + eor r24,r0 + mov r0,r9 + or r0,r13 + eor r25,r0 + mov r0,r6 + and r0,r14 + eor r10,r0 + mov r0,r7 + and r0,r15 + eor r11,r0 + mov r0,r8 + and r0,r24 + eor r12,r0 + mov r0,r9 + and r0,r25 + eor r13,r0 + mov r0,r2 + or r0,r10 + eor r6,r0 + mov r0,r3 + or r0,r11 + eor r7,r0 + mov r0,r4 + or r0,r12 + eor r8,r0 + mov r0,r5 + or r0,r13 + eor r9,r0 + mov r0,r6 + or r0,r14 + eor r2,r0 + mov r0,r7 + or r0,r15 + eor r3,r0 + mov r0,r8 + or r0,r24 + eor r4,r0 + mov r0,r9 + or r0,r25 + eor r5,r0 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + mov r0,r25 + mov r25,r24 + mov r24,r0 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + mov r0,r9 + mov r9,r8 + mov r8,r0 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + mov r0,r5 + mov r5,r4 + mov r4,r0 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + mov r0,r13 + mov r13,r12 + mov r12,r0 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r3,Y+2 + ldd r4,Y+3 + ldd r5,Y+4 + ldd r6,Y+5 + ldd r7,Y+6 + ldd r8,Y+7 + ldd r9,Y+8 + ldd r10,Y+9 + ldd r11,Y+10 + ldd r12,Y+11 + ldd r13,Y+12 + ldd r14,Y+13 + ldd r15,Y+14 + ldd r24,Y+15 + ldd r25,Y+16 + mov r0,r2 + and r0,r6 + eor r14,r0 + mov r0,r3 + and r0,r7 + eor r15,r0 + mov r0,r4 + and r0,r8 + eor r24,r0 + mov r0,r5 + and r0,r9 + eor r25,r0 + mov r0,r14 + or r0,r10 + eor r2,r0 + mov r0,r15 + or r0,r11 + eor r3,r0 + mov r0,r24 + or r0,r12 + eor r4,r0 + mov r0,r25 + or r0,r13 + eor r5,r0 + mov r0,r2 + or r0,r6 + eor r10,r0 + mov r0,r3 + or r0,r7 + eor r11,r0 + mov r0,r4 + or r0,r8 + eor r12,r0 + mov r0,r5 + or r0,r9 + eor r13,r0 + mov r0,r2 + and r0,r10 + eor r6,r0 + mov r0,r3 + and r0,r11 + eor r7,r0 + mov r0,r4 + and r0,r12 + eor r8,r0 + mov r0,r5 + and r0,r13 + eor r9,r0 + mov r0,r14 + or r0,r6 + eor r2,r0 + mov r0,r15 + or r0,r7 + eor r3,r0 + mov r0,r24 + or r0,r8 + eor r4,r0 + mov r0,r25 + or r0,r9 + eor r5,r0 + mov r0,r2 + or r0,r10 + eor r14,r0 + mov r0,r3 + or r0,r11 + eor r15,r0 + mov r0,r4 + or r0,r12 + eor r24,r0 + mov r0,r5 + or r0,r13 + eor r25,r0 + mov r0,r5 + mov r5,r4 + mov r4,r0 + mov r0,r9 + mov r9,r8 + mov r8,r0 + mov r0,r13 + mov r13,r12 + mov r12,r0 + mov r0,r25 + mov r25,r24 + mov r24,r0 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Y+17 + ldd r3,Y+18 + ldd r4,Y+19 + ldd r5,Y+20 + ldd r6,Y+21 + ldd r7,Y+22 + ldd r8,Y+23 + ldd r9,Y+24 + ldd r10,Y+25 + ldd r11,Y+26 + ldd r12,Y+27 + ldd r13,Y+28 + ldd r14,Y+29 + ldd r15,Y+30 + ldd r24,Y+31 + ldd r25,Y+32 + ldd r0,Y+1 + eor r0,r14 + std Y+1,r0 + ldd r0,Y+2 + eor r0,r15 + std Y+2,r0 + ldd r0,Y+3 + eor r0,r24 + std Y+3,r0 + ldd r0,Y+4 + eor r0,r25 + std Y+4,r0 + ldd r0,Y+5 + eor r0,r6 + std Y+5,r0 + ldd r0,Y+6 + eor r0,r7 + std Y+6,r0 + ldd r0,Y+7 + eor r0,r8 + std Y+7,r0 + ldd r0,Y+8 + eor r0,r9 + std Y+8,r0 + ldd r0,Y+9 + eor r0,r2 + std Y+9,r0 + ldd r0,Y+10 + eor r0,r3 + std Y+10,r0 + ldd r0,Y+11 + eor r0,r4 + std Y+11,r0 + ldd r0,Y+12 + eor r0,r5 + std Y+12,r0 + ldd r0,Y+13 + eor r0,r10 + std Y+13,r0 + ldd r0,Y+14 + eor r0,r11 + std Y+14,r0 + ldd r0,Y+15 + eor r0,r12 + std Y+15,r0 + ldd r0,Y+16 + eor r0,r13 + std Y+16,r0 + movw r20,r14 + movw r22,r24 + movw r14,r6 + movw r24,r8 + movw r6,r2 + movw r8,r4 + movw r2,r10 + movw r4,r12 + movw r10,r20 + movw r12,r22 + eor r10,r14 + eor r11,r15 + eor r12,r24 + eor r13,r25 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+5 + ldd r23,Y+6 + eor r20,r22 + eor r21,r23 + std Y+1,r22 + std Y+2,r23 + ldd r22,Y+9 + ldd r23,Y+10 + std Y+5,r22 + std Y+6,r23 + ldd r22,Y+13 + ldd r23,Y+14 + std Y+9,r22 + std Y+10,r23 + std Y+13,r20 + std Y+14,r21 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r22,Y+7 + ldd r23,Y+8 + eor r20,r22 + eor r21,r23 + std Y+3,r22 + std Y+4,r23 + ldd r22,Y+11 + ldd r23,Y+12 + std Y+7,r22 + std Y+8,r23 + ldd r22,Y+15 + ldd r23,Y+16 + std Y+11,r22 + std Y+12,r23 + std Y+15,r20 + std Y+16,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+5 + ldd r23,Y+6 + eor r20,r22 + eor r21,r23 + eor r22,r14 + eor r23,r15 + std Y+1,r22 + std Y+2,r23 + ldd r22,Y+9 + ldd r23,Y+10 + eor r22,r6 + eor r23,r7 + std Y+5,r22 + std Y+6,r23 + ldd r22,Y+13 + ldd r23,Y+14 + eor r22,r2 + eor r23,r3 + std Y+9,r22 + std Y+10,r23 + eor r20,r10 + eor r21,r11 + std Y+13,r20 + std Y+14,r21 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r22,Y+7 + ldd r23,Y+8 + eor r20,r22 + eor r21,r23 + eor r22,r24 + eor r23,r25 + std Y+3,r22 + std Y+4,r23 + ldd r22,Y+11 + ldd r23,Y+12 + eor r22,r8 + eor r23,r9 + std Y+7,r22 + std Y+8,r23 + ldd r22,Y+15 + ldd r23,Y+16 + eor r22,r4 + eor r23,r5 + std Y+11,r22 + std Y+12,r23 + eor r20,r12 + eor r21,r13 + std Y+15,r20 + std Y+16,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + mov r0,r15 + mov r15,r14 + mov r14,r0 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + mov r0,r7 + mov r7,r6 + mov r6,r0 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + mov r0,r3 + mov r3,r2 + mov r2,r0 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + mov r0,r11 + mov r11,r10 + mov r10,r0 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r17,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r17 +#endif + add r31,r19 + mov r30,r18 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + inc r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + inc r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r22,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r22,Z +#elif defined(__AVR_TINY__) + ld r22,Z +#else + lpm + mov r22,r0 +#endif + inc r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r23,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r23,Z +#elif defined(__AVR_TINY__) + ld r23,Z +#else + lpm + mov r23,r0 +#endif + inc r18 +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + ldd r0,Z+16 + eor r14,r0 + ldd r0,Z+17 + eor r15,r0 + ldd r0,Z+18 + eor r24,r0 + ldd r0,Z+19 + eor r25,r0 + ldd r0,Z+20 + eor r6,r0 + ldd r0,Z+21 + eor r7,r0 + ldd r0,Z+22 + eor r8,r0 + ldd r0,Z+23 + eor r9,r0 + ldd r0,Z+24 + eor r2,r0 + ldd r0,Z+25 + eor r3,r0 + ldd r0,Z+26 + eor r4,r0 + ldd r0,Z+27 + eor r5,r0 + ldd r0,Z+28 + eor r10,r0 + ldd r0,Z+29 + eor r11,r0 + ldd r0,Z+30 + eor r12,r0 + ldd r0,Z+31 + eor r13,r0 + std Y+17,r14 + std Y+18,r15 + std Y+19,r24 + std Y+20,r25 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r2 + std Y+26,r3 + std Y+27,r4 + std Y+28,r5 + std Y+29,r10 + std Y+30,r11 + std Y+31,r12 + std Y+32,r13 + ldd r2,Y+1 + ldd r3,Y+2 + ldd r4,Y+3 + ldd r5,Y+4 + ldd r6,Y+5 + ldd r7,Y+6 + ldd r8,Y+7 + ldd r9,Y+8 + ldd r10,Y+9 + ldd r11,Y+10 + ldd r12,Y+11 + ldd r13,Y+12 + ldd r14,Y+13 + ldd r15,Y+14 + ldd r24,Y+15 + ldd r25,Y+16 + mov r0,r5 + mov r5,r4 + mov r4,r0 + mov r0,r9 + mov r9,r8 + mov r8,r0 + mov r0,r13 + mov r13,r12 + mov r12,r0 + mov r0,r25 + mov r25,r24 + mov r24,r0 + ld r0,Z + eor r2,r0 + ldd r0,Z+1 + eor r3,r0 + ldd r0,Z+2 + eor r4,r0 + ldd r0,Z+3 + eor r5,r0 + ldd r0,Z+4 + eor r6,r0 + ldd r0,Z+5 + eor r7,r0 + ldd r0,Z+6 + eor r8,r0 + ldd r0,Z+7 + eor r9,r0 + ldd r0,Z+8 + eor r10,r0 + ldd r0,Z+9 + eor r11,r0 + ldd r0,Z+10 + eor r12,r0 + ldd r0,Z+11 + eor r13,r0 + ldd r0,Z+12 + eor r14,r0 + ldd r0,Z+13 + eor r15,r0 + ldd r0,Z+14 + eor r24,r0 + ldd r0,Z+15 + eor r25,r0 + eor r2,r20 + eor r3,r21 + eor r4,r22 + eor r5,r23 + ret +2622: + ldd r26,Y+33 + ldd r27,Y+34 + st X+,r2 + st X+,r3 + st X+,r6 + st X+,r7 + st X+,r10 + st X+,r11 + st X+,r14 + st X+,r15 + ldd r20,Y+17 + ldd r21,Y+18 + st X+,r20 + st X+,r21 + ldd r20,Y+21 + ldd r21,Y+22 + st X+,r20 + st X+,r21 + ldd r20,Y+25 + ldd r21,Y+26 + st X+,r20 + st X+,r21 + ldd r20,Y+29 + ldd r21,Y+30 + st X+,r20 + st X+,r21 + st X+,r4 + st X+,r5 + st X+,r8 + st X+,r9 + st X+,r12 + st X+,r13 + st X+,r24 + st X+,r25 + ldd r20,Y+19 + ldd r21,Y+20 + st X+,r20 + st X+,r21 + ldd r20,Y+23 + ldd r21,Y+24 + st X+,r20 + st X+,r21 + ldd r20,Y+27 + ldd r21,Y+28 + st X+,r20 + st X+,r21 + ldd r20,Y+31 + ldd r21,Y+32 + st X+,r20 + st X+,r21 + adiw r28,34 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size saturnin_encrypt_block, .-saturnin_encrypt_block + + .text +.global saturnin_decrypt_block + .type saturnin_decrypt_block, @function +saturnin_decrypt_block: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r23 + push r22 + movw r30,r24 + movw r26,r20 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 52 + ld r2,X+ + ld r3,X+ + ld r6,X+ + ld r7,X+ + ld r10,X+ + ld r11,X+ + ld r14,X+ + ld r15,X+ + ld r20,X+ + ld r21,X+ + std Y+17,r20 + std Y+18,r21 + ld r20,X+ + ld r21,X+ + std Y+21,r20 + std Y+22,r21 + ld r20,X+ + ld r21,X+ + std Y+25,r20 + std Y+26,r21 + ld r20,X+ + ld r21,X+ + std Y+29,r20 + std Y+30,r21 + ld r4,X+ + ld r5,X+ + ld r8,X+ + ld r9,X+ + ld r12,X+ + ld r13,X+ + ld r24,X+ + ld r25,X+ + ld r20,X+ + ld r21,X+ + std Y+19,r20 + std Y+20,r21 + ld r20,X+ + ld r21,X+ + std Y+23,r20 + std Y+24,r21 + ld r20,X+ + ld r21,X+ + std Y+27,r20 + std Y+28,r21 + ld r20,X+ + ld r21,X+ + std Y+31,r20 + std Y+32,r21 + ldi r16,10 + cpi r18,60 + cpc r19,r1 + brcs 56f + ldi r16,16 + ldi r17,4 + add r18,r17 + adc r19,r1 +56: + add r18,r16 + adc r19,r1 + lsl r18 + rol r19 + lsl r18 + rol r19 + rjmp 1233f +64: + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r17,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r17 +#endif + add r31,r19 + dec r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r23,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r23,Z +#elif defined(__AVR_TINY__) + ld r23,Z +#else + lpm + mov r23,r0 +#endif + dec r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r22,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r22,Z +#elif defined(__AVR_TINY__) + ld r22,Z +#else + lpm + mov r22,r0 +#endif + dec r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + dec r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + eor r2,r20 + eor r3,r21 + eor r4,r22 + eor r5,r23 +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + ld r0,Z + eor r2,r0 + ldd r0,Z+1 + eor r3,r0 + ldd r0,Z+2 + eor r4,r0 + ldd r0,Z+3 + eor r5,r0 + ldd r0,Z+4 + eor r6,r0 + ldd r0,Z+5 + eor r7,r0 + ldd r0,Z+6 + eor r8,r0 + ldd r0,Z+7 + eor r9,r0 + ldd r0,Z+8 + eor r10,r0 + ldd r0,Z+9 + eor r11,r0 + ldd r0,Z+10 + eor r12,r0 + ldd r0,Z+11 + eor r13,r0 + ldd r0,Z+12 + eor r14,r0 + ldd r0,Z+13 + eor r15,r0 + ldd r0,Z+14 + eor r24,r0 + ldd r0,Z+15 + eor r25,r0 + mov r0,r5 + mov r5,r4 + mov r4,r0 + mov r0,r9 + mov r9,r8 + mov r8,r0 + mov r0,r13 + mov r13,r12 + mov r12,r0 + mov r0,r25 + mov r25,r24 + mov r24,r0 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r14,Y+17 + ldd r15,Y+18 + ldd r24,Y+19 + ldd r25,Y+20 + ldd r6,Y+21 + ldd r7,Y+22 + ldd r8,Y+23 + ldd r9,Y+24 + ldd r2,Y+25 + ldd r3,Y+26 + ldd r4,Y+27 + ldd r5,Y+28 + ldd r10,Y+29 + ldd r11,Y+30 + ldd r12,Y+31 + ldd r13,Y+32 + ldd r0,Z+16 + eor r14,r0 + ldd r0,Z+17 + eor r15,r0 + ldd r0,Z+18 + eor r24,r0 + ldd r0,Z+19 + eor r25,r0 + ldd r0,Z+20 + eor r6,r0 + ldd r0,Z+21 + eor r7,r0 + ldd r0,Z+22 + eor r8,r0 + ldd r0,Z+23 + eor r9,r0 + ldd r0,Z+24 + eor r2,r0 + ldd r0,Z+25 + eor r3,r0 + ldd r0,Z+26 + eor r4,r0 + ldd r0,Z+27 + eor r5,r0 + ldd r0,Z+28 + eor r10,r0 + ldd r0,Z+29 + eor r11,r0 + ldd r0,Z+30 + eor r12,r0 + ldd r0,Z+31 + eor r13,r0 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + mov r0,r25 + mov r25,r24 + mov r24,r0 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + mov r0,r9 + mov r9,r8 + mov r8,r0 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + mov r0,r5 + mov r5,r4 + mov r4,r0 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + mov r0,r13 + mov r13,r12 + mov r12,r0 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+13 + ldd r21,Y+14 + eor r20,r10 + eor r21,r11 + ldd r22,Y+9 + ldd r23,Y+10 + eor r22,r2 + eor r23,r3 + std Y+13,r22 + std Y+14,r23 + ldd r22,Y+5 + ldd r23,Y+6 + eor r22,r6 + eor r23,r7 + std Y+9,r22 + std Y+10,r23 + ldd r22,Y+1 + ldd r23,Y+2 + eor r22,r14 + eor r23,r15 + eor r20,r22 + eor r21,r23 + std Y+5,r22 + std Y+6,r23 + std Y+1,r20 + std Y+2,r21 + ldd r20,Y+15 + ldd r21,Y+16 + eor r20,r12 + eor r21,r13 + ldd r22,Y+11 + ldd r23,Y+12 + eor r22,r4 + eor r23,r5 + std Y+15,r22 + std Y+16,r23 + ldd r22,Y+7 + ldd r23,Y+8 + eor r22,r8 + eor r23,r9 + std Y+11,r22 + std Y+12,r23 + ldd r22,Y+3 + ldd r23,Y+4 + eor r22,r24 + eor r23,r25 + eor r20,r22 + eor r21,r23 + std Y+7,r22 + std Y+8,r23 + std Y+3,r20 + std Y+4,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+9 + ldd r23,Y+10 + std Y+13,r22 + std Y+14,r23 + ldd r22,Y+5 + ldd r23,Y+6 + std Y+9,r22 + std Y+10,r23 + ldd r22,Y+1 + ldd r23,Y+2 + eor r20,r22 + eor r21,r23 + std Y+5,r22 + std Y+6,r23 + std Y+1,r20 + std Y+2,r21 + ldd r20,Y+15 + ldd r21,Y+16 + ldd r22,Y+11 + ldd r23,Y+12 + std Y+15,r22 + std Y+16,r23 + ldd r22,Y+7 + ldd r23,Y+8 + std Y+11,r22 + std Y+12,r23 + ldd r22,Y+3 + ldd r23,Y+4 + eor r20,r22 + eor r21,r23 + std Y+7,r22 + std Y+8,r23 + std Y+3,r20 + std Y+4,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + movw r20,r10 + movw r22,r12 + movw r10,r2 + movw r12,r4 + movw r2,r6 + movw r4,r8 + movw r6,r14 + movw r8,r24 + movw r14,r20 + movw r24,r22 + eor r14,r6 + eor r15,r7 + eor r24,r8 + eor r25,r9 + ldd r0,Y+1 + eor r0,r14 + std Y+1,r0 + ldd r0,Y+2 + eor r0,r15 + std Y+2,r0 + ldd r0,Y+3 + eor r0,r24 + std Y+3,r0 + ldd r0,Y+4 + eor r0,r25 + std Y+4,r0 + ldd r0,Y+5 + eor r0,r6 + std Y+5,r0 + ldd r0,Y+6 + eor r0,r7 + std Y+6,r0 + ldd r0,Y+7 + eor r0,r8 + std Y+7,r0 + ldd r0,Y+8 + eor r0,r9 + std Y+8,r0 + ldd r0,Y+9 + eor r0,r2 + std Y+9,r0 + ldd r0,Y+10 + eor r0,r3 + std Y+10,r0 + ldd r0,Y+11 + eor r0,r4 + std Y+11,r0 + ldd r0,Y+12 + eor r0,r5 + std Y+12,r0 + ldd r0,Y+13 + eor r0,r10 + std Y+13,r0 + ldd r0,Y+14 + eor r0,r11 + std Y+14,r0 + ldd r0,Y+15 + eor r0,r12 + std Y+15,r0 + ldd r0,Y+16 + eor r0,r13 + std Y+16,r0 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r3,Y+2 + ldd r4,Y+3 + ldd r5,Y+4 + ldd r6,Y+5 + ldd r7,Y+6 + ldd r8,Y+7 + ldd r9,Y+8 + ldd r10,Y+9 + ldd r11,Y+10 + ldd r12,Y+11 + ldd r13,Y+12 + ldd r14,Y+13 + ldd r15,Y+14 + ldd r24,Y+15 + ldd r25,Y+16 + mov r0,r5 + mov r5,r4 + mov r4,r0 + mov r0,r9 + mov r9,r8 + mov r8,r0 + mov r0,r13 + mov r13,r12 + mov r12,r0 + mov r0,r25 + mov r25,r24 + mov r24,r0 + mov r0,r2 + or r0,r10 + eor r14,r0 + mov r0,r3 + or r0,r11 + eor r15,r0 + mov r0,r4 + or r0,r12 + eor r24,r0 + mov r0,r5 + or r0,r13 + eor r25,r0 + mov r0,r14 + or r0,r6 + eor r2,r0 + mov r0,r15 + or r0,r7 + eor r3,r0 + mov r0,r24 + or r0,r8 + eor r4,r0 + mov r0,r25 + or r0,r9 + eor r5,r0 + mov r0,r2 + and r0,r10 + eor r6,r0 + mov r0,r3 + and r0,r11 + eor r7,r0 + mov r0,r4 + and r0,r12 + eor r8,r0 + mov r0,r5 + and r0,r13 + eor r9,r0 + mov r0,r2 + or r0,r6 + eor r10,r0 + mov r0,r3 + or r0,r7 + eor r11,r0 + mov r0,r4 + or r0,r8 + eor r12,r0 + mov r0,r5 + or r0,r9 + eor r13,r0 + mov r0,r14 + or r0,r10 + eor r2,r0 + mov r0,r15 + or r0,r11 + eor r3,r0 + mov r0,r24 + or r0,r12 + eor r4,r0 + mov r0,r25 + or r0,r13 + eor r5,r0 + mov r0,r2 + and r0,r6 + eor r14,r0 + mov r0,r3 + and r0,r7 + eor r15,r0 + mov r0,r4 + and r0,r8 + eor r24,r0 + mov r0,r5 + and r0,r9 + eor r25,r0 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Y+17 + ldd r3,Y+18 + ldd r4,Y+19 + ldd r5,Y+20 + ldd r6,Y+21 + ldd r7,Y+22 + ldd r8,Y+23 + ldd r9,Y+24 + ldd r10,Y+25 + ldd r11,Y+26 + ldd r12,Y+27 + ldd r13,Y+28 + ldd r14,Y+29 + ldd r15,Y+30 + ldd r24,Y+31 + ldd r25,Y+32 + mov r0,r15 + mov r15,r14 + mov r14,r0 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + mov r0,r7 + mov r7,r6 + mov r6,r0 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + mov r0,r3 + mov r3,r2 + mov r2,r0 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + mov r0,r11 + mov r11,r10 + mov r10,r0 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + mov r0,r6 + or r0,r14 + eor r2,r0 + mov r0,r7 + or r0,r15 + eor r3,r0 + mov r0,r8 + or r0,r24 + eor r4,r0 + mov r0,r9 + or r0,r25 + eor r5,r0 + mov r0,r2 + or r0,r10 + eor r6,r0 + mov r0,r3 + or r0,r11 + eor r7,r0 + mov r0,r4 + or r0,r12 + eor r8,r0 + mov r0,r5 + or r0,r13 + eor r9,r0 + mov r0,r6 + and r0,r14 + eor r10,r0 + mov r0,r7 + and r0,r15 + eor r11,r0 + mov r0,r8 + and r0,r24 + eor r12,r0 + mov r0,r9 + and r0,r25 + eor r13,r0 + mov r0,r6 + or r0,r10 + eor r14,r0 + mov r0,r7 + or r0,r11 + eor r15,r0 + mov r0,r8 + or r0,r12 + eor r24,r0 + mov r0,r9 + or r0,r13 + eor r25,r0 + mov r0,r2 + or r0,r14 + eor r6,r0 + mov r0,r3 + or r0,r15 + eor r7,r0 + mov r0,r4 + or r0,r24 + eor r8,r0 + mov r0,r5 + or r0,r25 + eor r9,r0 + mov r0,r6 + and r0,r10 + eor r2,r0 + mov r0,r7 + and r0,r11 + eor r3,r0 + mov r0,r8 + and r0,r12 + eor r4,r0 + mov r0,r9 + and r0,r13 + eor r5,r0 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+9 + ldd r21,Y+10 + eor r20,r14 + eor r21,r15 + ldd r22,Y+5 + ldd r23,Y+6 + eor r22,r10 + eor r23,r11 + std Y+9,r22 + std Y+10,r23 + ldd r22,Y+1 + ldd r23,Y+2 + eor r22,r6 + eor r23,r7 + std Y+5,r22 + std Y+6,r23 + ldd r22,Y+13 + ldd r23,Y+14 + eor r22,r2 + eor r23,r3 + eor r20,r22 + eor r21,r23 + std Y+1,r22 + std Y+2,r23 + std Y+13,r20 + std Y+14,r21 + ldd r20,Y+11 + ldd r21,Y+12 + eor r20,r24 + eor r21,r25 + ldd r22,Y+7 + ldd r23,Y+8 + eor r22,r12 + eor r23,r13 + std Y+11,r22 + std Y+12,r23 + ldd r22,Y+3 + ldd r23,Y+4 + eor r22,r8 + eor r23,r9 + std Y+7,r22 + std Y+8,r23 + ldd r22,Y+15 + ldd r23,Y+16 + eor r22,r4 + eor r23,r5 + eor r20,r22 + eor r21,r23 + std Y+3,r22 + std Y+4,r23 + std Y+15,r20 + std Y+16,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+5 + ldd r23,Y+6 + std Y+9,r22 + std Y+10,r23 + ldd r22,Y+1 + ldd r23,Y+2 + std Y+5,r22 + std Y+6,r23 + ldd r22,Y+13 + ldd r23,Y+14 + eor r20,r22 + eor r21,r23 + std Y+1,r22 + std Y+2,r23 + std Y+13,r20 + std Y+14,r21 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r22,Y+7 + ldd r23,Y+8 + std Y+11,r22 + std Y+12,r23 + ldd r22,Y+3 + ldd r23,Y+4 + std Y+7,r22 + std Y+8,r23 + ldd r22,Y+15 + ldd r23,Y+16 + eor r20,r22 + eor r21,r23 + std Y+3,r22 + std Y+4,r23 + std Y+15,r20 + std Y+16,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + movw r20,r14 + movw r22,r24 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r20 + movw r4,r22 + eor r2,r6 + eor r3,r7 + eor r4,r8 + eor r5,r9 + ldd r0,Y+13 + eor r0,r2 + std Y+13,r0 + ldd r0,Y+14 + eor r0,r3 + std Y+14,r0 + ldd r0,Y+15 + eor r0,r4 + std Y+15,r0 + ldd r0,Y+16 + eor r0,r5 + std Y+16,r0 + ldd r0,Y+1 + eor r0,r6 + std Y+1,r0 + ldd r0,Y+2 + eor r0,r7 + std Y+2,r0 + ldd r0,Y+3 + eor r0,r8 + std Y+3,r0 + ldd r0,Y+4 + eor r0,r9 + std Y+4,r0 + ldd r0,Y+5 + eor r0,r10 + std Y+5,r0 + ldd r0,Y+6 + eor r0,r11 + std Y+6,r0 + ldd r0,Y+7 + eor r0,r12 + std Y+7,r0 + ldd r0,Y+8 + eor r0,r13 + std Y+8,r0 + ldd r0,Y+9 + eor r0,r14 + std Y+9,r0 + ldd r0,Y+10 + eor r0,r15 + std Y+10,r0 + ldd r0,Y+11 + eor r0,r24 + std Y+11,r0 + ldd r0,Y+12 + eor r0,r25 + std Y+12,r0 + mov r0,r6 + or r0,r2 + eor r10,r0 + mov r0,r7 + or r0,r3 + eor r11,r0 + mov r0,r8 + or r0,r4 + eor r12,r0 + mov r0,r9 + or r0,r5 + eor r13,r0 + mov r0,r10 + or r0,r14 + eor r6,r0 + mov r0,r11 + or r0,r15 + eor r7,r0 + mov r0,r12 + or r0,r24 + eor r8,r0 + mov r0,r13 + or r0,r25 + eor r9,r0 + mov r0,r6 + and r0,r2 + eor r14,r0 + mov r0,r7 + and r0,r3 + eor r15,r0 + mov r0,r8 + and r0,r4 + eor r24,r0 + mov r0,r9 + and r0,r5 + eor r25,r0 + mov r0,r6 + or r0,r14 + eor r2,r0 + mov r0,r7 + or r0,r15 + eor r3,r0 + mov r0,r8 + or r0,r24 + eor r4,r0 + mov r0,r9 + or r0,r25 + eor r5,r0 + mov r0,r10 + or r0,r2 + eor r6,r0 + mov r0,r11 + or r0,r3 + eor r7,r0 + mov r0,r12 + or r0,r4 + eor r8,r0 + mov r0,r13 + or r0,r5 + eor r9,r0 + mov r0,r6 + and r0,r14 + eor r10,r0 + mov r0,r7 + and r0,r15 + eor r11,r0 + mov r0,r8 + and r0,r24 + eor r12,r0 + mov r0,r9 + and r0,r25 + eor r13,r0 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r3,Y+2 + ldd r4,Y+3 + ldd r5,Y+4 + ldd r6,Y+5 + ldd r7,Y+6 + ldd r8,Y+7 + ldd r9,Y+8 + ldd r10,Y+9 + ldd r11,Y+10 + ldd r12,Y+11 + ldd r13,Y+12 + ldd r14,Y+13 + ldd r15,Y+14 + ldd r24,Y+15 + ldd r25,Y+16 + mov r0,r14 + or r0,r6 + eor r10,r0 + mov r0,r15 + or r0,r7 + eor r11,r0 + mov r0,r24 + or r0,r8 + eor r12,r0 + mov r0,r25 + or r0,r9 + eor r13,r0 + mov r0,r10 + or r0,r2 + eor r14,r0 + mov r0,r11 + or r0,r3 + eor r15,r0 + mov r0,r12 + or r0,r4 + eor r24,r0 + mov r0,r13 + or r0,r5 + eor r25,r0 + mov r0,r14 + and r0,r6 + eor r2,r0 + mov r0,r15 + and r0,r7 + eor r3,r0 + mov r0,r24 + and r0,r8 + eor r4,r0 + mov r0,r25 + and r0,r9 + eor r5,r0 + mov r0,r14 + or r0,r2 + eor r6,r0 + mov r0,r15 + or r0,r3 + eor r7,r0 + mov r0,r24 + or r0,r4 + eor r8,r0 + mov r0,r25 + or r0,r5 + eor r9,r0 + mov r0,r10 + or r0,r6 + eor r14,r0 + mov r0,r11 + or r0,r7 + eor r15,r0 + mov r0,r12 + or r0,r8 + eor r24,r0 + mov r0,r13 + or r0,r9 + eor r25,r0 + mov r0,r14 + and r0,r2 + eor r10,r0 + mov r0,r15 + and r0,r3 + eor r11,r0 + mov r0,r24 + and r0,r4 + eor r12,r0 + mov r0,r25 + and r0,r5 + eor r13,r0 + ret +1233: + rcall 64b + rcall 1239f + subi r16,2 + brne 1233b + rjmp 2560f +1239: + ldd r0,Z+32 + eor r10,r0 + ldd r0,Z+33 + eor r11,r0 + ldd r0,Z+34 + eor r12,r0 + ldd r0,Z+35 + eor r13,r0 + ldd r0,Z+36 + eor r14,r0 + ldd r0,Z+37 + eor r15,r0 + ldd r0,Z+38 + eor r24,r0 + ldd r0,Z+39 + eor r25,r0 + ldd r0,Z+40 + eor r2,r0 + ldd r0,Z+41 + eor r3,r0 + ldd r0,Z+42 + eor r4,r0 + ldd r0,Z+43 + eor r5,r0 + ldd r0,Z+44 + eor r6,r0 + ldd r0,Z+45 + eor r7,r0 + ldd r0,Z+46 + eor r8,r0 + ldd r0,Z+47 + eor r9,r0 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r17,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r17 +#endif + add r31,r19 + dec r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r23,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r23,Z +#elif defined(__AVR_TINY__) + ld r23,Z +#else + lpm + mov r23,r0 +#endif + dec r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r22,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r22,Z +#elif defined(__AVR_TINY__) + ld r22,Z +#else + lpm + mov r22,r0 +#endif + dec r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + dec r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + eor r10,r20 + eor r11,r21 + eor r12,r22 + eor r13,r23 +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + movw r20,r12 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r12,r22 + and r13,r22 + lsl r12 + rol r13 + lsl r12 + rol r13 + or r12,r20 + or r13,r21 + movw r20,r24 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r24,r22 + and r25,r22 + lsl r24 + rol r25 + lsl r24 + rol r25 + or r24,r20 + or r25,r21 + movw r20,r4 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r4,r22 + and r5,r22 + lsl r4 + rol r5 + lsl r4 + rol r5 + or r4,r20 + or r5,r21 + movw r20,r8 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r8,r22 + and r9,r22 + lsl r8 + rol r9 + lsl r8 + rol r9 + or r8,r20 + or r9,r21 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Y+17 + ldd r3,Y+18 + ldd r4,Y+19 + ldd r5,Y+20 + ldd r6,Y+21 + ldd r7,Y+22 + ldd r8,Y+23 + ldd r9,Y+24 + ldd r10,Y+25 + ldd r11,Y+26 + ldd r12,Y+27 + ldd r13,Y+28 + ldd r14,Y+29 + ldd r15,Y+30 + ldd r24,Y+31 + ldd r25,Y+32 + ldd r0,Z+48 + eor r10,r0 + ldd r0,Z+49 + eor r11,r0 + ldd r0,Z+50 + eor r12,r0 + ldd r0,Z+51 + eor r13,r0 + ldd r0,Z+52 + eor r6,r0 + ldd r0,Z+53 + eor r7,r0 + ldd r0,Z+54 + eor r8,r0 + ldd r0,Z+55 + eor r9,r0 + ldd r0,Z+56 + eor r14,r0 + ldd r0,Z+57 + eor r15,r0 + ldd r0,Z+58 + eor r24,r0 + ldd r0,Z+59 + eor r25,r0 + ldd r0,Z+60 + eor r2,r0 + ldd r0,Z+61 + eor r3,r0 + ldd r0,Z+62 + eor r4,r0 + ldd r0,Z+63 + eor r5,r0 + movw r20,r10 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r10,r22 + and r11,r22 + lsl r10 + rol r11 + or r10,r20 + or r11,r21 + movw r20,r12 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r12,r22 + and r13,r22 + lsl r12 + rol r13 + lsl r12 + rol r13 + lsl r12 + rol r13 + or r12,r20 + or r13,r21 + movw r20,r6 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r6,r22 + and r7,r22 + lsl r6 + rol r7 + or r6,r20 + or r7,r21 + movw r20,r8 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r8,r22 + and r9,r22 + lsl r8 + rol r9 + lsl r8 + rol r9 + lsl r8 + rol r9 + or r8,r20 + or r9,r21 + movw r20,r14 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r14,r22 + and r15,r22 + lsl r14 + rol r15 + or r14,r20 + or r15,r21 + movw r20,r24 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r24,r22 + and r25,r22 + lsl r24 + rol r25 + lsl r24 + rol r25 + lsl r24 + rol r25 + or r24,r20 + or r25,r21 + movw r20,r2 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r2,r22 + and r3,r22 + lsl r2 + rol r3 + or r2,r20 + or r3,r21 + movw r20,r4 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r4,r22 + and r5,r22 + lsl r4 + rol r5 + lsl r4 + rol r5 + lsl r4 + rol r5 + or r4,r20 + or r5,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+5 + ldd r21,Y+6 + eor r20,r2 + eor r21,r3 + ldd r22,Y+1 + ldd r23,Y+2 + eor r22,r14 + eor r23,r15 + std Y+5,r22 + std Y+6,r23 + ldd r22,Y+13 + ldd r23,Y+14 + eor r22,r6 + eor r23,r7 + std Y+1,r22 + std Y+2,r23 + ldd r22,Y+9 + ldd r23,Y+10 + eor r22,r10 + eor r23,r11 + eor r20,r22 + eor r21,r23 + std Y+13,r22 + std Y+14,r23 + std Y+9,r20 + std Y+10,r21 + ldd r20,Y+7 + ldd r21,Y+8 + eor r20,r4 + eor r21,r5 + ldd r22,Y+3 + ldd r23,Y+4 + eor r22,r24 + eor r23,r25 + std Y+7,r22 + std Y+8,r23 + ldd r22,Y+15 + ldd r23,Y+16 + eor r22,r8 + eor r23,r9 + std Y+3,r22 + std Y+4,r23 + ldd r22,Y+11 + ldd r23,Y+12 + eor r22,r12 + eor r23,r13 + eor r20,r22 + eor r21,r23 + std Y+15,r22 + std Y+16,r23 + std Y+11,r20 + std Y+12,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+1 + ldd r23,Y+2 + std Y+5,r22 + std Y+6,r23 + ldd r22,Y+13 + ldd r23,Y+14 + std Y+1,r22 + std Y+2,r23 + ldd r22,Y+9 + ldd r23,Y+10 + eor r20,r22 + eor r21,r23 + std Y+13,r22 + std Y+14,r23 + std Y+9,r20 + std Y+10,r21 + ldd r20,Y+7 + ldd r21,Y+8 + ldd r22,Y+3 + ldd r23,Y+4 + std Y+7,r22 + std Y+8,r23 + ldd r22,Y+15 + ldd r23,Y+16 + std Y+3,r22 + std Y+4,r23 + ldd r22,Y+11 + ldd r23,Y+12 + eor r20,r22 + eor r21,r23 + std Y+15,r22 + std Y+16,r23 + std Y+11,r20 + std Y+12,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + movw r20,r2 + movw r22,r4 + movw r2,r14 + movw r4,r24 + movw r14,r6 + movw r24,r8 + movw r6,r10 + movw r8,r12 + movw r10,r20 + movw r12,r22 + eor r10,r6 + eor r11,r7 + eor r12,r8 + eor r13,r9 + ldd r0,Y+9 + eor r0,r10 + std Y+9,r0 + ldd r0,Y+10 + eor r0,r11 + std Y+10,r0 + ldd r0,Y+11 + eor r0,r12 + std Y+11,r0 + ldd r0,Y+12 + eor r0,r13 + std Y+12,r0 + ldd r0,Y+13 + eor r0,r6 + std Y+13,r0 + ldd r0,Y+14 + eor r0,r7 + std Y+14,r0 + ldd r0,Y+15 + eor r0,r8 + std Y+15,r0 + ldd r0,Y+16 + eor r0,r9 + std Y+16,r0 + ldd r0,Y+1 + eor r0,r14 + std Y+1,r0 + ldd r0,Y+2 + eor r0,r15 + std Y+2,r0 + ldd r0,Y+3 + eor r0,r24 + std Y+3,r0 + ldd r0,Y+4 + eor r0,r25 + std Y+4,r0 + ldd r0,Y+5 + eor r0,r2 + std Y+5,r0 + ldd r0,Y+6 + eor r0,r3 + std Y+6,r0 + ldd r0,Y+7 + eor r0,r4 + std Y+7,r0 + ldd r0,Y+8 + eor r0,r5 + std Y+8,r0 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r3,Y+2 + ldd r4,Y+3 + ldd r5,Y+4 + ldd r6,Y+5 + ldd r7,Y+6 + ldd r8,Y+7 + ldd r9,Y+8 + ldd r10,Y+9 + ldd r11,Y+10 + ldd r12,Y+11 + ldd r13,Y+12 + ldd r14,Y+13 + ldd r15,Y+14 + ldd r24,Y+15 + ldd r25,Y+16 + movw r20,r8 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r8,r22 + and r9,r22 + lsl r8 + rol r9 + lsl r8 + rol r9 + or r8,r20 + or r9,r21 + movw r20,r12 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r12,r22 + and r13,r22 + lsl r12 + rol r13 + lsl r12 + rol r13 + or r12,r20 + or r13,r21 + movw r20,r24 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r24,r22 + and r25,r22 + lsl r24 + rol r25 + lsl r24 + rol r25 + or r24,r20 + or r25,r21 + movw r20,r4 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r4,r22 + and r5,r22 + lsl r4 + rol r5 + lsl r4 + rol r5 + or r4,r20 + or r5,r21 + mov r0,r10 + or r0,r2 + eor r6,r0 + mov r0,r11 + or r0,r3 + eor r7,r0 + mov r0,r12 + or r0,r4 + eor r8,r0 + mov r0,r13 + or r0,r5 + eor r9,r0 + mov r0,r6 + or r0,r14 + eor r10,r0 + mov r0,r7 + or r0,r15 + eor r11,r0 + mov r0,r8 + or r0,r24 + eor r12,r0 + mov r0,r9 + or r0,r25 + eor r13,r0 + mov r0,r10 + and r0,r2 + eor r14,r0 + mov r0,r11 + and r0,r3 + eor r15,r0 + mov r0,r12 + and r0,r4 + eor r24,r0 + mov r0,r13 + and r0,r5 + eor r25,r0 + mov r0,r10 + or r0,r14 + eor r2,r0 + mov r0,r11 + or r0,r15 + eor r3,r0 + mov r0,r12 + or r0,r24 + eor r4,r0 + mov r0,r13 + or r0,r25 + eor r5,r0 + mov r0,r6 + or r0,r2 + eor r10,r0 + mov r0,r7 + or r0,r3 + eor r11,r0 + mov r0,r8 + or r0,r4 + eor r12,r0 + mov r0,r9 + or r0,r5 + eor r13,r0 + mov r0,r10 + and r0,r14 + eor r6,r0 + mov r0,r11 + and r0,r15 + eor r7,r0 + mov r0,r12 + and r0,r24 + eor r8,r0 + mov r0,r13 + and r0,r25 + eor r9,r0 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Y+17 + ldd r3,Y+18 + ldd r4,Y+19 + ldd r5,Y+20 + ldd r6,Y+21 + ldd r7,Y+22 + ldd r8,Y+23 + ldd r9,Y+24 + ldd r10,Y+25 + ldd r11,Y+26 + ldd r12,Y+27 + ldd r13,Y+28 + ldd r14,Y+29 + ldd r15,Y+30 + ldd r24,Y+31 + ldd r25,Y+32 + movw r20,r24 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r24,r22 + and r25,r22 + lsl r24 + rol r25 + or r24,r20 + or r25,r21 + movw r20,r14 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r14,r22 + and r15,r22 + lsl r14 + rol r15 + lsl r14 + rol r15 + lsl r14 + rol r15 + or r14,r20 + or r15,r21 + movw r20,r8 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r8,r22 + and r9,r22 + lsl r8 + rol r9 + or r8,r20 + or r9,r21 + movw r20,r6 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r6,r22 + and r7,r22 + lsl r6 + rol r7 + lsl r6 + rol r7 + lsl r6 + rol r7 + or r6,r20 + or r7,r21 + movw r20,r4 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r4,r22 + and r5,r22 + lsl r4 + rol r5 + or r4,r20 + or r5,r21 + movw r20,r2 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r2,r22 + and r3,r22 + lsl r2 + rol r3 + lsl r2 + rol r3 + lsl r2 + rol r3 + or r2,r20 + or r3,r21 + movw r20,r12 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r12,r22 + and r13,r22 + lsl r12 + rol r13 + or r12,r20 + or r13,r21 + movw r20,r10 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r10,r22 + and r11,r22 + lsl r10 + rol r11 + lsl r10 + rol r11 + lsl r10 + rol r11 + or r10,r20 + or r11,r21 + mov r0,r6 + or r0,r10 + eor r14,r0 + mov r0,r7 + or r0,r11 + eor r15,r0 + mov r0,r8 + or r0,r12 + eor r24,r0 + mov r0,r9 + or r0,r13 + eor r25,r0 + mov r0,r14 + or r0,r2 + eor r6,r0 + mov r0,r15 + or r0,r3 + eor r7,r0 + mov r0,r24 + or r0,r4 + eor r8,r0 + mov r0,r25 + or r0,r5 + eor r9,r0 + mov r0,r6 + and r0,r10 + eor r2,r0 + mov r0,r7 + and r0,r11 + eor r3,r0 + mov r0,r8 + and r0,r12 + eor r4,r0 + mov r0,r9 + and r0,r13 + eor r5,r0 + mov r0,r6 + or r0,r2 + eor r10,r0 + mov r0,r7 + or r0,r3 + eor r11,r0 + mov r0,r8 + or r0,r4 + eor r12,r0 + mov r0,r9 + or r0,r5 + eor r13,r0 + mov r0,r14 + or r0,r10 + eor r6,r0 + mov r0,r15 + or r0,r11 + eor r7,r0 + mov r0,r24 + or r0,r12 + eor r8,r0 + mov r0,r25 + or r0,r13 + eor r9,r0 + mov r0,r6 + and r0,r2 + eor r14,r0 + mov r0,r7 + and r0,r3 + eor r15,r0 + mov r0,r8 + and r0,r4 + eor r24,r0 + mov r0,r9 + and r0,r5 + eor r25,r0 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+1 + ldd r21,Y+2 + eor r20,r10 + eor r21,r11 + ldd r22,Y+13 + ldd r23,Y+14 + eor r22,r2 + eor r23,r3 + std Y+1,r22 + std Y+2,r23 + ldd r22,Y+9 + ldd r23,Y+10 + eor r22,r6 + eor r23,r7 + std Y+13,r22 + std Y+14,r23 + ldd r22,Y+5 + ldd r23,Y+6 + eor r22,r14 + eor r23,r15 + eor r20,r22 + eor r21,r23 + std Y+9,r22 + std Y+10,r23 + std Y+5,r20 + std Y+6,r21 + ldd r20,Y+3 + ldd r21,Y+4 + eor r20,r12 + eor r21,r13 + ldd r22,Y+15 + ldd r23,Y+16 + eor r22,r4 + eor r23,r5 + std Y+3,r22 + std Y+4,r23 + ldd r22,Y+11 + ldd r23,Y+12 + eor r22,r8 + eor r23,r9 + std Y+15,r22 + std Y+16,r23 + ldd r22,Y+7 + ldd r23,Y+8 + eor r22,r24 + eor r23,r25 + eor r20,r22 + eor r21,r23 + std Y+11,r22 + std Y+12,r23 + std Y+7,r20 + std Y+8,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+13 + ldd r23,Y+14 + std Y+1,r22 + std Y+2,r23 + ldd r22,Y+9 + ldd r23,Y+10 + std Y+13,r22 + std Y+14,r23 + ldd r22,Y+5 + ldd r23,Y+6 + eor r20,r22 + eor r21,r23 + std Y+9,r22 + std Y+10,r23 + std Y+5,r20 + std Y+6,r21 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r22,Y+15 + ldd r23,Y+16 + std Y+3,r22 + std Y+4,r23 + ldd r22,Y+11 + ldd r23,Y+12 + std Y+15,r22 + std Y+16,r23 + ldd r22,Y+7 + ldd r23,Y+8 + eor r20,r22 + eor r21,r23 + std Y+11,r22 + std Y+12,r23 + std Y+7,r20 + std Y+8,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + movw r20,r10 + movw r22,r12 + movw r10,r2 + movw r12,r4 + movw r2,r6 + movw r4,r8 + movw r6,r14 + movw r8,r24 + movw r14,r20 + movw r24,r22 + eor r14,r6 + eor r15,r7 + eor r24,r8 + eor r25,r9 + ldd r0,Y+5 + eor r0,r14 + std Y+5,r0 + ldd r0,Y+6 + eor r0,r15 + std Y+6,r0 + ldd r0,Y+7 + eor r0,r24 + std Y+7,r0 + ldd r0,Y+8 + eor r0,r25 + std Y+8,r0 + ldd r0,Y+9 + eor r0,r6 + std Y+9,r0 + ldd r0,Y+10 + eor r0,r7 + std Y+10,r0 + ldd r0,Y+11 + eor r0,r8 + std Y+11,r0 + ldd r0,Y+12 + eor r0,r9 + std Y+12,r0 + ldd r0,Y+13 + eor r0,r2 + std Y+13,r0 + ldd r0,Y+14 + eor r0,r3 + std Y+14,r0 + ldd r0,Y+15 + eor r0,r4 + std Y+15,r0 + ldd r0,Y+16 + eor r0,r5 + std Y+16,r0 + ldd r0,Y+1 + eor r0,r10 + std Y+1,r0 + ldd r0,Y+2 + eor r0,r11 + std Y+2,r0 + ldd r0,Y+3 + eor r0,r12 + std Y+3,r0 + ldd r0,Y+4 + eor r0,r13 + std Y+4,r0 + mov r0,r6 + or r0,r14 + eor r2,r0 + mov r0,r7 + or r0,r15 + eor r3,r0 + mov r0,r8 + or r0,r24 + eor r4,r0 + mov r0,r9 + or r0,r25 + eor r5,r0 + mov r0,r2 + or r0,r10 + eor r6,r0 + mov r0,r3 + or r0,r11 + eor r7,r0 + mov r0,r4 + or r0,r12 + eor r8,r0 + mov r0,r5 + or r0,r13 + eor r9,r0 + mov r0,r6 + and r0,r14 + eor r10,r0 + mov r0,r7 + and r0,r15 + eor r11,r0 + mov r0,r8 + and r0,r24 + eor r12,r0 + mov r0,r9 + and r0,r25 + eor r13,r0 + mov r0,r6 + or r0,r10 + eor r14,r0 + mov r0,r7 + or r0,r11 + eor r15,r0 + mov r0,r8 + or r0,r12 + eor r24,r0 + mov r0,r9 + or r0,r13 + eor r25,r0 + mov r0,r2 + or r0,r14 + eor r6,r0 + mov r0,r3 + or r0,r15 + eor r7,r0 + mov r0,r4 + or r0,r24 + eor r8,r0 + mov r0,r5 + or r0,r25 + eor r9,r0 + mov r0,r6 + and r0,r10 + eor r2,r0 + mov r0,r7 + and r0,r11 + eor r3,r0 + mov r0,r8 + and r0,r12 + eor r4,r0 + mov r0,r9 + and r0,r13 + eor r5,r0 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r3,Y+2 + ldd r4,Y+3 + ldd r5,Y+4 + ldd r6,Y+5 + ldd r7,Y+6 + ldd r8,Y+7 + ldd r9,Y+8 + ldd r10,Y+9 + ldd r11,Y+10 + ldd r12,Y+11 + ldd r13,Y+12 + ldd r14,Y+13 + ldd r15,Y+14 + ldd r24,Y+15 + ldd r25,Y+16 + mov r0,r6 + or r0,r14 + eor r2,r0 + mov r0,r7 + or r0,r15 + eor r3,r0 + mov r0,r8 + or r0,r24 + eor r4,r0 + mov r0,r9 + or r0,r25 + eor r5,r0 + mov r0,r2 + or r0,r10 + eor r6,r0 + mov r0,r3 + or r0,r11 + eor r7,r0 + mov r0,r4 + or r0,r12 + eor r8,r0 + mov r0,r5 + or r0,r13 + eor r9,r0 + mov r0,r6 + and r0,r14 + eor r10,r0 + mov r0,r7 + and r0,r15 + eor r11,r0 + mov r0,r8 + and r0,r24 + eor r12,r0 + mov r0,r9 + and r0,r25 + eor r13,r0 + mov r0,r6 + or r0,r10 + eor r14,r0 + mov r0,r7 + or r0,r11 + eor r15,r0 + mov r0,r8 + or r0,r12 + eor r24,r0 + mov r0,r9 + or r0,r13 + eor r25,r0 + mov r0,r2 + or r0,r14 + eor r6,r0 + mov r0,r3 + or r0,r15 + eor r7,r0 + mov r0,r4 + or r0,r24 + eor r8,r0 + mov r0,r5 + or r0,r25 + eor r9,r0 + mov r0,r6 + and r0,r10 + eor r2,r0 + mov r0,r7 + and r0,r11 + eor r3,r0 + mov r0,r8 + and r0,r12 + eor r4,r0 + mov r0,r9 + and r0,r13 + eor r5,r0 + ret +2560: + ldd r26,Y+33 + ldd r27,Y+34 + ld r0,Z + eor r2,r0 + ldd r0,Z+1 + eor r3,r0 + ldd r0,Z+2 + eor r4,r0 + ldd r0,Z+3 + eor r5,r0 + ldd r0,Z+4 + eor r6,r0 + ldd r0,Z+5 + eor r7,r0 + ldd r0,Z+6 + eor r8,r0 + ldd r0,Z+7 + eor r9,r0 + ldd r0,Z+8 + eor r10,r0 + ldd r0,Z+9 + eor r11,r0 + ldd r0,Z+10 + eor r12,r0 + ldd r0,Z+11 + eor r13,r0 + ldd r0,Z+12 + eor r14,r0 + ldd r0,Z+13 + eor r15,r0 + ldd r0,Z+14 + eor r24,r0 + ldd r0,Z+15 + eor r25,r0 + st X+,r2 + st X+,r3 + st X+,r6 + st X+,r7 + st X+,r10 + st X+,r11 + st X+,r14 + st X+,r15 + ldd r20,Y+17 + ldd r21,Y+18 + ldd r0,Z+16 + eor r20,r0 + ldd r0,Z+17 + eor r21,r0 + st X+,r20 + st X+,r21 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r0,Z+20 + eor r20,r0 + ldd r0,Z+21 + eor r21,r0 + st X+,r20 + st X+,r21 + ldd r20,Y+25 + ldd r21,Y+26 + ldd r0,Z+24 + eor r20,r0 + ldd r0,Z+25 + eor r21,r0 + st X+,r20 + st X+,r21 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r0,Z+28 + eor r20,r0 + ldd r0,Z+29 + eor r21,r0 + st X+,r20 + st X+,r21 + st X+,r4 + st X+,r5 + st X+,r8 + st X+,r9 + st X+,r12 + st X+,r13 + st X+,r24 + st X+,r25 + ldd r20,Y+19 + ldd r21,Y+20 + ldd r0,Z+18 + eor r20,r0 + ldd r0,Z+19 + eor r21,r0 + st X+,r20 + st X+,r21 + ldd r20,Y+23 + ldd r21,Y+24 + ldd r0,Z+22 + eor r20,r0 + ldd r0,Z+23 + eor r21,r0 + st X+,r20 + st X+,r21 + ldd r20,Y+27 + ldd r21,Y+28 + ldd r0,Z+26 + eor r20,r0 + ldd r0,Z+27 + eor r21,r0 + st X+,r20 + st X+,r21 + ldd r20,Y+31 + ldd r21,Y+32 + ldd r0,Z+30 + eor r20,r0 + ldd r0,Z+31 + eor r21,r0 + st X+,r20 + st X+,r21 + adiw r28,34 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size saturnin_decrypt_block, .-saturnin_decrypt_block + +#endif diff --git a/saturnin/Implementations/crypto_aead/saturninshortv2/rhys/internal-saturnin.c b/saturnin/Implementations/crypto_aead/saturninshortv2/rhys/internal-saturnin.c new file mode 100644 index 0000000..f4be50d --- /dev/null +++ b/saturnin/Implementations/crypto_aead/saturninshortv2/rhys/internal-saturnin.c @@ -0,0 +1,483 @@ +/* + * Copyright (C) 2020 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "internal-saturnin.h" + +#if !defined(__AVR__) + +/* Round constants for various combinations of rounds and domain_sep */ +static uint32_t const saturnin_rc[] = { + /* RC_10_1 */ + 0x4eb026c2, 0x90595303, 0xaa8fe632, 0xfe928a92, 0x4115a419, + 0x93539532, 0x5db1cc4e, 0x541515ca, 0xbd1f55a8, 0x5a6e1a0d, + /* RC_10_2 */ + 0x4e4526b5, 0xa3565ff0, 0x0f8f20d8, 0x0b54bee1, 0x7d1a6c9d, + 0x17a6280a, 0xaa46c986, 0xc1199062, 0x182c5cde, 0xa00d53fe, + /* RC_10_3 */ + 0x4e162698, 0xb2535ba1, 0x6c8f9d65, 0x5816ad30, 0x691fd4fa, + 0x6bf5bcf9, 0xf8eb3525, 0xb21decfa, 0x7b3da417, 0xf62c94b4, + /* RC_10_4 */ + 0x4faf265b, 0xc5484616, 0x45dcad21, 0xe08bd607, 0x0504fdb8, + 0x1e1f5257, 0x45fbc216, 0xeb529b1f, 0x52194e32, 0x5498c018, + /* RC_10_5 */ + 0x4ffc2676, 0xd44d4247, 0x26dc109c, 0xb3c9c5d6, 0x110145df, + 0x624cc6a4, 0x17563eb5, 0x9856e787, 0x3108b6fb, 0x02b90752, + /* RC_10_6 */ + 0x4f092601, 0xe7424eb4, 0x83dcd676, 0x460ff1a5, 0x2d0e8d5b, + 0xe6b97b9c, 0xe0a13b7d, 0x0d5a622f, 0x943bbf8d, 0xf8da4ea1, + /* RC_16_7 */ + 0x3fba180c, 0x563ab9ab, 0x125ea5ef, 0x859da26c, 0xb8cf779b, + 0x7d4de793, 0x07efb49f, 0x8d525306, 0x1e08e6ab, 0x41729f87, + 0x8c4aef0a, 0x4aa0c9a7, 0xd93a95ef, 0xbb00d2af, 0xb62c5bf0, + 0x386d94d8, + /* RC_16_8 */ + 0x3c9b19a7, 0xa9098694, 0x23f878da, 0xa7b647d3, 0x74fc9d78, + 0xeacaae11, 0x2f31a677, 0x4cc8c054, 0x2f51ca05, 0x5268f195, + 0x4f5b8a2b, 0xf614b4ac, 0xf1d95401, 0x764d2568, 0x6a493611, + 0x8eef9c3e +}; + +/* Loads a 32-bit word from the two halves of a 256-bit Saturnin input block */ +#define saturnin_load_word32(ptr) \ + ((((uint32_t)((ptr)[17])) << 24) | \ + (((uint32_t)((ptr)[16])) << 16) | \ + (((uint32_t)((ptr)[1])) << 8) | \ + ((uint32_t)((ptr)[0]))) + +/* Stores a 32-bit word to the two halves of a 256-bit Saturnin output block */ +#define saturnin_store_word32(ptr, x) \ + do { \ + (ptr)[0] = (uint8_t)(x); \ + (ptr)[1] = (uint8_t)((x) >> 8); \ + (ptr)[16] = (uint8_t)((x) >> 16); \ + (ptr)[17] = (uint8_t)((x) >> 24); \ + } while (0) + +/* Rotate the 4-bit nibbles within a 16-bit word left */ +#define leftRotate4_N(a, mask1, bits1, mask2, bits2) \ + do { \ + (a) = (((a) & (mask1)) << (bits1)) | \ + (((a) & ((mask1) ^ (uint32_t)0xFFFFU)) >> (4 - (bits1))) | \ + (((a) & (((uint32_t)(mask2)) << 16)) << (bits2)) | \ + (((a) & (((uint32_t)((mask2)) << 16) ^ 0xFFFF0000U)) >> (4 - (bits2))); \ + } while (0) + +/* Rotate 16-bit subwords left */ +#define leftRotate16_N(a, mask1, bits1, mask2, bits2) \ + do { \ + (a) = (((a) & (mask1)) << (bits1)) | \ + (((a) & ((mask1) ^ (uint32_t)0xFFFFU)) >> (16 - (bits1))) | \ + (((a) & (((uint32_t)(mask2)) << 16)) << (bits2)) | \ + (((a) & (((uint32_t)((mask2)) << 16) ^ 0xFFFF0000U)) >> (16 - (bits2))); \ + } while (0) + +/** + * \brief XOR the key into the Saturnin state. + * + * \param x0 First word of the bit-sliced state. + * \param x1 Second word of the bit-sliced state. + * \param x2 Third word of the bit-sliced state. + * \param x3 Fourth word of the bit-sliced state. + * \param x4 Fifth word of the bit-sliced state. + * \param x5 Sixth word of the bit-sliced state. + * \param x6 Seventh word of the bit-sliced state. + * \param x7 Eighth word of the bit-sliced state. + */ +#define saturnin_xor_key(x0, x1, x2, x3, x4, x5, x6, x7) \ + do { \ + x0 ^= ks->k[0]; \ + x1 ^= ks->k[1]; \ + x2 ^= ks->k[2]; \ + x3 ^= ks->k[3]; \ + x4 ^= ks->k[4]; \ + x5 ^= ks->k[5]; \ + x6 ^= ks->k[6]; \ + x7 ^= ks->k[7]; \ + } while (0) + +/** + * \brief XOR a rotated version of the key into the Saturnin state. + * + * \param x0 First word of the bit-sliced state. + * \param x1 Second word of the bit-sliced state. + * \param x2 Third word of the bit-sliced state. + * \param x3 Fourth word of the bit-sliced state. + * \param x4 Fifth word of the bit-sliced state. + * \param x5 Sixth word of the bit-sliced state. + * \param x6 Seventh word of the bit-sliced state. + * \param x7 Eighth word of the bit-sliced state. + */ +#define saturnin_xor_key_rotated(x0, x1, x2, x3, x4, x5, x6, x7) \ + do { \ + x0 ^= ks->k[8]; \ + x1 ^= ks->k[9]; \ + x2 ^= ks->k[10]; \ + x3 ^= ks->k[11]; \ + x4 ^= ks->k[12]; \ + x5 ^= ks->k[13]; \ + x6 ^= ks->k[14]; \ + x7 ^= ks->k[15]; \ + } while (0) + +/** + * \brief Applies the Saturnin S-box to a bit-sliced set of nibbles. + * + * \param a First bit-slice. + * \param b Second bit-slice. + * \param c Third bit-slice. + * \param d Fourth bit-slice. + * + * The S-box also involves a rotation on the output words. We perform the + * rotation implicitly in the higher layers. + */ +#define saturnin_sbox(a, b, c, d) \ + do { \ + (a) ^= (b) & (c); \ + (b) ^= (a) | (d); \ + (d) ^= (b) | (c); \ + (c) ^= (b) & (d); \ + (b) ^= (a) | (c); \ + (a) ^= (b) | (d); \ + } while (0) + +/** + * \brief Applies the inverse of the Saturnin S-box to a set of nibbles. + * + * \param a First bit-slice. + * \param b Second bit-slice. + * \param c Third bit-slice. + * \param d Fourth bit-slice. + * + * The inverse of the S-box also involves a rotation on the input words. + * We perform the rotation implicitly in the higher layers. + */ +#define saturnin_sbox_inverse(a, b, c, d) \ + do { \ + (a) ^= (b) | (d); \ + (b) ^= (a) | (c); \ + (c) ^= (b) & (d); \ + (d) ^= (b) | (c); \ + (b) ^= (a) | (d); \ + (a) ^= (b) & (c); \ + } while (0) + +/* Helpers for MDS matrix operations */ +#define SWAP(a) (((a) << 16) | ((a) >> 16)) +#define MUL(x0, x1, x2, x3) \ + do { \ + temp = x0; x0 = x1; x1 = x2; x2 = x3; x3 = temp ^ x0; \ + } while (0) +#define MULINV(x0, x1, x2, x3) \ + do { \ + temp = x3; x3 = x2; x2 = x1; x1 = x0; x0 = x1 ^ temp; \ + } while (0) + +/** + * \brief Applies the MDS matrix to the Saturnin state. + * + * \param x0 First word of the bit-sliced state. + * \param x1 Second word of the bit-sliced state. + * \param x2 Third word of the bit-sliced state. + * \param x3 Fourth word of the bit-sliced state. + * \param x4 Fifth word of the bit-sliced state. + * \param x5 Sixth word of the bit-sliced state. + * \param x6 Seventh word of the bit-sliced state. + * \param x7 Eighth word of the bit-sliced state. + */ +#define saturnin_mds(x0, x1, x2, x3, x4, x5, x6, x7) \ + do { \ + x0 ^= x4; x1 ^= x5; x2 ^= x6; x3 ^= x7; \ + MUL(x4, x5, x6, x7); \ + x4 ^= SWAP(x0); x5 ^= SWAP(x1); \ + x6 ^= SWAP(x2); x7 ^= SWAP(x3); \ + MUL(x0, x1, x2, x3); \ + MUL(x0, x1, x2, x3); \ + x0 ^= x4; x1 ^= x5; x2 ^= x6; x3 ^= x7; \ + x4 ^= SWAP(x0); x5 ^= SWAP(x1); \ + x6 ^= SWAP(x2); x7 ^= SWAP(x3); \ + } while (0) + +/** + * \brief Applies the inverse of the MDS matrix to the Saturnin state. + * + * \param x0 First word of the bit-sliced state. + * \param x1 Second word of the bit-sliced state. + * \param x2 Third word of the bit-sliced state. + * \param x3 Fourth word of the bit-sliced state. + * \param x4 Fifth word of the bit-sliced state. + * \param x5 Sixth word of the bit-sliced state. + * \param x6 Seventh word of the bit-sliced state. + * \param x7 Eighth word of the bit-sliced state. + */ +#define saturnin_mds_inverse(x0, x1, x2, x3, x4, x5, x6, x7) \ + do { \ + x6 ^= SWAP(x2); x7 ^= SWAP(x3); \ + x4 ^= SWAP(x0); x5 ^= SWAP(x1); \ + x0 ^= x4; x1 ^= x5; x2 ^= x6; x3 ^= x7; \ + MULINV(x0, x1, x2, x3); \ + MULINV(x0, x1, x2, x3); \ + x6 ^= SWAP(x2); x7 ^= SWAP(x3); \ + x4 ^= SWAP(x0); x5 ^= SWAP(x1); \ + MULINV(x4, x5, x6, x7); \ + x0 ^= x4; x1 ^= x5; x2 ^= x6; x3 ^= x7; \ + } while (0) + +/** + * \brief Applies the slice permutation to the Saturnin state. + * + * \param x0 First word of the bit-sliced state. + * \param x1 Second word of the bit-sliced state. + * \param x2 Third word of the bit-sliced state. + * \param x3 Fourth word of the bit-sliced state. + * \param x4 Fifth word of the bit-sliced state. + * \param x5 Sixth word of the bit-sliced state. + * \param x6 Seventh word of the bit-sliced state. + * \param x7 Eighth word of the bit-sliced state. + */ +#define saturnin_slice(x0, x1, x2, x3, x4, x5, x6, x7) \ + do { \ + leftRotate4_N(x0, 0xFFFFU, 0, 0x3333, 2); \ + leftRotate4_N(x1, 0xFFFFU, 0, 0x3333, 2); \ + leftRotate4_N(x2, 0xFFFFU, 0, 0x3333, 2); \ + leftRotate4_N(x3, 0xFFFFU, 0, 0x3333, 2); \ + leftRotate4_N(x4, 0x7777U, 1, 0x1111, 3); \ + leftRotate4_N(x5, 0x7777U, 1, 0x1111, 3); \ + leftRotate4_N(x6, 0x7777U, 1, 0x1111, 3); \ + leftRotate4_N(x7, 0x7777U, 1, 0x1111, 3); \ + } while (0) + +/** + * \brief Applies the inverse of the slice permutation to the Saturnin state. + * + * \param x0 First word of the bit-sliced state. + * \param x1 Second word of the bit-sliced state. + * \param x2 Third word of the bit-sliced state. + * \param x3 Fourth word of the bit-sliced state. + * \param x4 Fifth word of the bit-sliced state. + * \param x5 Sixth word of the bit-sliced state. + * \param x6 Seventh word of the bit-sliced state. + * \param x7 Eighth word of the bit-sliced state. + */ +#define saturnin_slice_inverse(x0, x1, x2, x3, x4, x5, x6, x7) \ + do { \ + leftRotate4_N(x0, 0xFFFFU, 0, 0x3333, 2); \ + leftRotate4_N(x1, 0xFFFFU, 0, 0x3333, 2); \ + leftRotate4_N(x2, 0xFFFFU, 0, 0x3333, 2); \ + leftRotate4_N(x3, 0xFFFFU, 0, 0x3333, 2); \ + leftRotate4_N(x4, 0x1111U, 3, 0x7777, 1); \ + leftRotate4_N(x5, 0x1111U, 3, 0x7777, 1); \ + leftRotate4_N(x6, 0x1111U, 3, 0x7777, 1); \ + leftRotate4_N(x7, 0x1111U, 3, 0x7777, 1); \ + } while (0) + +/** + * \brief Applies the sheet permutation to the Saturnin state. + * + * \param x0 First word of the bit-sliced state. + * \param x1 Second word of the bit-sliced state. + * \param x2 Third word of the bit-sliced state. + * \param x3 Fourth word of the bit-sliced state. + * \param x4 Fifth word of the bit-sliced state. + * \param x5 Sixth word of the bit-sliced state. + * \param x6 Seventh word of the bit-sliced state. + * \param x7 Eighth word of the bit-sliced state. + */ +#define saturnin_sheet(x0, x1, x2, x3, x4, x5, x6, x7) \ + do { \ + leftRotate16_N(x0, 0xFFFFU, 0, 0x00FF, 8); \ + leftRotate16_N(x1, 0xFFFFU, 0, 0x00FF, 8); \ + leftRotate16_N(x2, 0xFFFFU, 0, 0x00FF, 8); \ + leftRotate16_N(x3, 0xFFFFU, 0, 0x00FF, 8); \ + leftRotate16_N(x4, 0x0FFFU, 4, 0x000F, 12); \ + leftRotate16_N(x5, 0x0FFFU, 4, 0x000F, 12); \ + leftRotate16_N(x6, 0x0FFFU, 4, 0x000F, 12); \ + leftRotate16_N(x7, 0x0FFFU, 4, 0x000F, 12); \ + } while (0) + +/** + * \brief Applies the inverse of the sheet permutation to the Saturnin state. + * + * \param x0 First word of the bit-sliced state. + * \param x1 Second word of the bit-sliced state. + * \param x2 Third word of the bit-sliced state. + * \param x3 Fourth word of the bit-sliced state. + * \param x4 Fifth word of the bit-sliced state. + * \param x5 Sixth word of the bit-sliced state. + * \param x6 Seventh word of the bit-sliced state. + * \param x7 Eighth word of the bit-sliced state. + */ +#define saturnin_sheet_inverse(x0, x1, x2, x3, x4, x5, x6, x7) \ + do { \ + leftRotate16_N(x0, 0xFFFFU, 0, 0x00FF, 8); \ + leftRotate16_N(x1, 0xFFFFU, 0, 0x00FF, 8); \ + leftRotate16_N(x2, 0xFFFFU, 0, 0x00FF, 8); \ + leftRotate16_N(x3, 0xFFFFU, 0, 0x00FF, 8); \ + leftRotate16_N(x4, 0x000FU, 12, 0x0FFF, 4); \ + leftRotate16_N(x5, 0x000FU, 12, 0x0FFF, 4); \ + leftRotate16_N(x6, 0x000FU, 12, 0x0FFF, 4); \ + leftRotate16_N(x7, 0x000FU, 12, 0x0FFF, 4); \ + } while (0) + +void saturnin_setup_key + (saturnin_key_schedule_t *ks, const unsigned char *key) +{ + int index; + uint32_t temp; + for (index = 0; index < 16; index += 2) { + temp = saturnin_load_word32(key + index); + ks->k[index / 2] = temp; + ks->k[8 + (index / 2)] = ((temp & 0x001F001FU) << 11) | + ((temp >> 5) & 0x07FF07FFU); + } +} + +void saturnin_encrypt_block + (const saturnin_key_schedule_t *ks, unsigned char *output, + const unsigned char *input, unsigned domain) +{ + unsigned rounds = (domain >= SATURNIN_DOMAIN_16_7) ? 8 : 5; + const uint32_t *rc = saturnin_rc + domain; + uint32_t x0, x1, x2, x3, x4, x5, x6, x7, temp; + + /* Load the input into local variables */ + x0 = saturnin_load_word32(input); + x1 = saturnin_load_word32(input + 2); + x2 = saturnin_load_word32(input + 4); + x3 = saturnin_load_word32(input + 6); + x4 = saturnin_load_word32(input + 8); + x5 = saturnin_load_word32(input + 10); + x6 = saturnin_load_word32(input + 12); + x7 = saturnin_load_word32(input + 14); + + /* XOR the key into the state */ + saturnin_xor_key(x0, x1, x2, x3, x4, x5, x6, x7); + + /* Perform all encryption rounds, two at a time */ + for (; rounds > 0; --rounds, rc += 2) { + /* Even rounds */ + saturnin_sbox(x0, x1, x2, x3); + saturnin_sbox(x4, x5, x6, x7); + saturnin_mds(x1, x2, x3, x0, x7, x5, x4, x6); + saturnin_sbox(x1, x2, x3, x0); + saturnin_sbox(x7, x5, x4, x6); + saturnin_slice(x2, x3, x0, x1, x6, x5, x7, x4); + saturnin_mds(x2, x3, x0, x1, x6, x5, x7, x4); + saturnin_slice_inverse(x2, x3, x0, x1, x6, x5, x7, x4); + x2 ^= rc[0]; + saturnin_xor_key_rotated(x2, x3, x0, x1, x6, x5, x7, x4); + + /* Odd rounds */ + saturnin_sbox(x2, x3, x0, x1); + saturnin_sbox(x6, x5, x7, x4); + saturnin_mds(x3, x0, x1, x2, x4, x5, x6, x7); + saturnin_sbox(x3, x0, x1, x2); + saturnin_sbox(x4, x5, x6, x7); + saturnin_sheet(x0, x1, x2, x3, x7, x5, x4, x6); + saturnin_mds(x0, x1, x2, x3, x7, x5, x4, x6); + saturnin_sheet_inverse(x0, x1, x2, x3, x7, x5, x4, x6); + x0 ^= rc[1]; + saturnin_xor_key(x0, x1, x2, x3, x7, x5, x4, x6); + + /* Correct the rotation of the second half before the next round */ + temp = x4; + x4 = x7; + x7 = x6; + x6 = temp; + } + + /* Store the local variables to the output buffer */ + saturnin_store_word32(output, x0); + saturnin_store_word32(output + 2, x1); + saturnin_store_word32(output + 4, x2); + saturnin_store_word32(output + 6, x3); + saturnin_store_word32(output + 8, x4); + saturnin_store_word32(output + 10, x5); + saturnin_store_word32(output + 12, x6); + saturnin_store_word32(output + 14, x7); +} + +void saturnin_decrypt_block + (const saturnin_key_schedule_t *ks, unsigned char *output, + const unsigned char *input, unsigned domain) +{ + unsigned rounds = (domain >= SATURNIN_DOMAIN_16_7) ? 8 : 5; + const uint32_t *rc = saturnin_rc + domain + (rounds - 1) * 2; + uint32_t x0, x1, x2, x3, x4, x5, x6, x7, temp; + + /* Load the input into local variables */ + x0 = saturnin_load_word32(input); + x1 = saturnin_load_word32(input + 2); + x2 = saturnin_load_word32(input + 4); + x3 = saturnin_load_word32(input + 6); + x4 = saturnin_load_word32(input + 8); + x5 = saturnin_load_word32(input + 10); + x6 = saturnin_load_word32(input + 12); + x7 = saturnin_load_word32(input + 14); + + /* Perform all decryption rounds, two at a time */ + for (; rounds > 0; --rounds, rc -= 2) { + /* Correct the rotation of the second half before the next round */ + temp = x6; + x6 = x7; + x7 = x4; + x4 = temp; + + /* Odd rounds */ + saturnin_xor_key(x0, x1, x2, x3, x7, x5, x4, x6); + x0 ^= rc[1]; + saturnin_sheet(x0, x1, x2, x3, x7, x5, x4, x6); + saturnin_mds_inverse(x0, x1, x2, x3, x7, x5, x4, x6); + saturnin_sheet_inverse(x0, x1, x2, x3, x7, x5, x4, x6); + saturnin_sbox_inverse(x3, x0, x1, x2); + saturnin_sbox_inverse(x4, x5, x6, x7); + saturnin_mds_inverse(x3, x0, x1, x2, x4, x5, x6, x7); + saturnin_sbox_inverse(x2, x3, x0, x1); + saturnin_sbox_inverse(x6, x5, x7, x4); + + /* Even rounds */ + saturnin_xor_key_rotated(x2, x3, x0, x1, x6, x5, x7, x4); + x2 ^= rc[0]; + saturnin_slice(x2, x3, x0, x1, x6, x5, x7, x4); + saturnin_mds_inverse(x2, x3, x0, x1, x6, x5, x7, x4); + saturnin_slice_inverse(x2, x3, x0, x1, x6, x5, x7, x4); + saturnin_sbox_inverse(x1, x2, x3, x0); + saturnin_sbox_inverse(x7, x5, x4, x6); + saturnin_mds_inverse(x1, x2, x3, x0, x7, x5, x4, x6); + saturnin_sbox_inverse(x0, x1, x2, x3); + saturnin_sbox_inverse(x4, x5, x6, x7); + } + + /* XOR the key into the state */ + saturnin_xor_key(x0, x1, x2, x3, x4, x5, x6, x7); + + /* Store the local variables to the output buffer */ + saturnin_store_word32(output, x0); + saturnin_store_word32(output + 2, x1); + saturnin_store_word32(output + 4, x2); + saturnin_store_word32(output + 6, x3); + saturnin_store_word32(output + 8, x4); + saturnin_store_word32(output + 10, x5); + saturnin_store_word32(output + 12, x6); + saturnin_store_word32(output + 14, x7); +} + +#endif /* !__AVR__ */ diff --git a/saturnin/Implementations/crypto_aead/saturninshortv2/rhys/internal-saturnin.h b/saturnin/Implementations/crypto_aead/saturninshortv2/rhys/internal-saturnin.h new file mode 100644 index 0000000..8af07c3 --- /dev/null +++ b/saturnin/Implementations/crypto_aead/saturninshortv2/rhys/internal-saturnin.h @@ -0,0 +1,137 @@ +/* + * Copyright (C) 2020 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef LW_INTERNAL_SATURNIN_H +#define LW_INTERNAL_SATURNIN_H + +/** + * \file internal-saturnin.h + * \brief Saturnin block cipher. + * + * References: https://project.inria.fr/saturnin/ + */ + +#include "internal-util.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * \brief Size of a Saturnin block in bytes. + */ +#define SATURNIN_BLOCK_SIZE 32 + +/** + * \brief Domain separator index 1 for the 10-round version of Saturnin. + */ +#define SATURNIN_DOMAIN_10_1 0 + +/** + * \brief Domain separator index 2 for the 10-round version of Saturnin. + */ +#define SATURNIN_DOMAIN_10_2 10 + +/** + * \brief Domain separator index 3 for the 10-round version of Saturnin. + */ +#define SATURNIN_DOMAIN_10_3 20 + +/** + * \brief Domain separator index 4 for the 10-round version of Saturnin. + */ +#define SATURNIN_DOMAIN_10_4 30 + +/** + * \brief Domain separator index 5 for the 10-round version of Saturnin. + */ +#define SATURNIN_DOMAIN_10_5 40 + +/** + * \brief Domain separator index 6 for the 10-round version of Saturnin. + */ +#define SATURNIN_DOMAIN_10_6 50 + +/** + * \brief Domain separator index 7 for the 16-round version of Saturnin. + */ +#define SATURNIN_DOMAIN_16_7 60 + +/** + * \brief Domain separator index 8 for the 16-round version of Saturnin. + */ +#define SATURNIN_DOMAIN_16_8 76 + +/** + * \brief Structure of the key schedule for Saturnin. + */ +typedef struct +{ + /** Pre-computed round keys for Saturnin */ + uint32_t k[16]; + +} saturnin_key_schedule_t; + +/** + * \brief Sets up a key schedule for Saturnin. + * + * \param ks Points to the key schedule to initialize. + * \param key Points to the 32 bytes of the key data. + */ +void saturnin_setup_key + (saturnin_key_schedule_t *ks, const unsigned char *key); + +/** + * \brief Encrypts a 256-bit block with Saturnin. + * + * \param ks Points to the Saturnin key schedule. + * \param output Output buffer which must be at least 32 bytes in length. + * \param input Input buffer which must be at least 32 bytes in length. + * \param domain Domain separator and round count indicator. + * + * The \a input and \a output buffers can be the same buffer for + * in-place encryption. + */ +void saturnin_encrypt_block + (const saturnin_key_schedule_t *ks, unsigned char *output, + const unsigned char *input, unsigned domain); + +/** + * \brief Decrypts a 256-bit block with Saturnin. + * + * \param ks Points to the Saturnin key schedule. + * \param output Output buffer which must be at least 32 bytes in length. + * \param input Input buffer which must be at least 32 bytes in length. + * \param domain Domain separator and round count indicator. + * + * The \a input and \a output buffers can be the same buffer for + * in-place decryption. + */ +void saturnin_decrypt_block + (const saturnin_key_schedule_t *ks, unsigned char *output, + const unsigned char *input, unsigned domain); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/saturnin/Implementations/crypto_aead/saturninshortv2/rhys/saturnin.c b/saturnin/Implementations/crypto_aead/saturninshortv2/rhys/saturnin.c index 734fc69..d2bd2cc 100644 --- a/saturnin/Implementations/crypto_aead/saturninshortv2/rhys/saturnin.c +++ b/saturnin/Implementations/crypto_aead/saturninshortv2/rhys/saturnin.c @@ -21,7 +21,7 @@ */ #include "saturnin.h" -#include "internal-util.h" +#include "internal-saturnin.h" #include aead_cipher_t const saturnin_cipher = { @@ -57,440 +57,22 @@ aead_hash_algorithm_t const saturnin_hash_algorithm = { 0 /* squeeze */ }; -/* Round constant tables for various combinations of rounds and domain_sep */ -static uint32_t const RC_10_1[] = { - 0x4eb026c2, 0x90595303, 0xaa8fe632, 0xfe928a92, 0x4115a419, - 0x93539532, 0x5db1cc4e, 0x541515ca, 0xbd1f55a8, 0x5a6e1a0d -}; -static uint32_t const RC_10_2[] = { - 0x4e4526b5, 0xa3565ff0, 0x0f8f20d8, 0x0b54bee1, 0x7d1a6c9d, - 0x17a6280a, 0xaa46c986, 0xc1199062, 0x182c5cde, 0xa00d53fe -}; -static uint32_t const RC_10_3[] = { - 0x4e162698, 0xb2535ba1, 0x6c8f9d65, 0x5816ad30, 0x691fd4fa, - 0x6bf5bcf9, 0xf8eb3525, 0xb21decfa, 0x7b3da417, 0xf62c94b4 -}; -static uint32_t const RC_10_4[] = { - 0x4faf265b, 0xc5484616, 0x45dcad21, 0xe08bd607, 0x0504fdb8, - 0x1e1f5257, 0x45fbc216, 0xeb529b1f, 0x52194e32, 0x5498c018 -}; -static uint32_t const RC_10_5[] = { - 0x4ffc2676, 0xd44d4247, 0x26dc109c, 0xb3c9c5d6, 0x110145df, - 0x624cc6a4, 0x17563eb5, 0x9856e787, 0x3108b6fb, 0x02b90752 -}; -static uint32_t const RC_10_6[] = { - 0x4f092601, 0xe7424eb4, 0x83dcd676, 0x460ff1a5, 0x2d0e8d5b, - 0xe6b97b9c, 0xe0a13b7d, 0x0d5a622f, 0x943bbf8d, 0xf8da4ea1 -}; -static uint32_t const RC_16_7[] = { - 0x3fba180c, 0x563ab9ab, 0x125ea5ef, 0x859da26c, 0xb8cf779b, - 0x7d4de793, 0x07efb49f, 0x8d525306, 0x1e08e6ab, 0x41729f87, - 0x8c4aef0a, 0x4aa0c9a7, 0xd93a95ef, 0xbb00d2af, 0xb62c5bf0, - 0x386d94d8 -}; -static uint32_t const RC_16_8[] = { - 0x3c9b19a7, 0xa9098694, 0x23f878da, 0xa7b647d3, 0x74fc9d78, - 0xeacaae11, 0x2f31a677, 0x4cc8c054, 0x2f51ca05, 0x5268f195, - 0x4f5b8a2b, 0xf614b4ac, 0xf1d95401, 0x764d2568, 0x6a493611, - 0x8eef9c3e -}; - -/* Rotate the 4-bit nibbles within a 16-bit word left */ -#define leftRotate4_N(a, mask1, bits1, mask2, bits2) \ - do { \ - uint32_t _temp = (a); \ - (a) = ((_temp & (mask1)) << (bits1)) | \ - ((_temp & ((mask1) ^ (uint32_t)0xFFFFU)) >> (4 - (bits1))) | \ - ((_temp & (((uint32_t)(mask2)) << 16)) << (bits2)) | \ - ((_temp & (((uint32_t)((mask2)) << 16) ^ 0xFFFF0000U)) >> (4 - (bits2))); \ - } while (0) - -/* Rotate 16-bit subwords left */ -#define leftRotate16_N(a, mask1, bits1, mask2, bits2) \ - do { \ - uint32_t _temp = (a); \ - (a) = ((_temp & (mask1)) << (bits1)) | \ - ((_temp & ((mask1) ^ (uint32_t)0xFFFFU)) >> (16 - (bits1))) | \ - ((_temp & (((uint32_t)(mask2)) << 16)) << (bits2)) | \ - ((_temp & (((uint32_t)((mask2)) << 16) ^ 0xFFFF0000U)) >> (16 - (bits2))); \ - } while (0) - -/* XOR the SATURNIN state with the key */ -#define saturnin_xor_key() \ - do { \ - for (index = 0; index < 8; ++index) \ - S[index] ^= K[index]; \ - } while (0) - -/* XOR the SATURNIN state with a rotated version of the key */ -#define saturnin_xor_key_rotated() \ - do { \ - for (index = 0; index < 8; ++index) \ - S[index] ^= K[index + 8]; \ - } while (0) - -/* Apply an SBOX layer for SATURNIN - definition from the specification */ -#define S_LAYER(a, b, c, d) \ - do { \ - (a) ^= (b) & (c); \ - (b) ^= (a) | (d); \ - (d) ^= (b) | (c); \ - (c) ^= (b) & (d); \ - (b) ^= (a) | (c); \ - (a) ^= (b) | (d); \ - } while (0) - -/* Apply an SBOX layer for SATURNIN in reverse */ -#define S_LAYER_INVERSE(a, b, c, d) \ - do { \ - (a) ^= (b) | (d); \ - (b) ^= (a) | (c); \ - (c) ^= (b) & (d); \ - (d) ^= (b) | (c); \ - (b) ^= (a) | (d); \ - (a) ^= (b) & (c); \ - } while (0) - -/** - * \brief Applies the SBOX to the SATURNIN state. - * - * \param S The state. - */ -static void saturnin_sbox(uint32_t S[8]) -{ - uint32_t a, b, c, d; - - /* PI_0 on the first half of the state */ - a = S[0]; b = S[1]; c = S[2]; d = S[3]; - S_LAYER(a, b, c, d); - S[0] = b; S[1] = c; S[2] = d; S[3] = a; - - /* PI_1 on the second half of the state */ - a = S[4]; b = S[5]; c = S[6]; d = S[7]; - S_LAYER(a, b, c, d); - S[4] = d; S[5] = b; S[6] = a; S[7] = c; -} - -/** - * \brief Applies the inverse of the SBOX to the SATURNIN state. - * - * \param S The state. - */ -static void saturnin_sbox_inverse(uint32_t S[8]) -{ - uint32_t a, b, c, d; - - /* PI_0 on the first half of the state */ - b = S[0]; c = S[1]; d = S[2]; a = S[3]; - S_LAYER_INVERSE(a, b, c, d); - S[0] = a; S[1] = b; S[2] = c; S[3] = d; - - /* PI_1 on the second half of the state */ - d = S[4]; b = S[5]; a = S[6]; c = S[7]; - S_LAYER_INVERSE(a, b, c, d); - S[4] = a; S[5] = b; S[6] = c; S[7] = d; -} - -/** - * \brief Applies the MDS matrix to the SATURNIN state. - * - * \param S The state. - */ -static void saturnin_mds(uint32_t S[8]) -{ - uint32_t x0, x1, x2, x3, x4, x5, x6, x7; - uint32_t tmp; - - /* Load the state into temporary working variables */ - x0 = S[0]; x1 = S[1]; x2 = S[2]; x3 = S[3]; - x4 = S[4]; x5 = S[5]; x6 = S[6]; x7 = S[7]; - - /* Apply the MDS matrix to the state */ - #define SWAP(a) (((a) << 16) | ((a) >> 16)) - #define MUL(x0, x1, x2, x3, tmp) \ - do { \ - tmp = x0; x0 = x1; x1 = x2; x2 = x3; x3 = tmp ^ x0; \ - } while (0) - x0 ^= x4; x1 ^= x5; x2 ^= x6; x3 ^= x7; - MUL(x4, x5, x6, x7, tmp); - x4 ^= SWAP(x0); x5 ^= SWAP(x1); - x6 ^= SWAP(x2); x7 ^= SWAP(x3); - MUL(x0, x1, x2, x3, tmp); - MUL(x0, x1, x2, x3, tmp); - x0 ^= x4; x1 ^= x5; x2 ^= x6; x3 ^= x7; - x4 ^= SWAP(x0); x5 ^= SWAP(x1); - x6 ^= SWAP(x2); x7 ^= SWAP(x3); - - /* Store the temporary working variables back into the state */ - S[0] = x0; S[1] = x1; S[2] = x2; S[3] = x3; - S[4] = x4; S[5] = x5; S[6] = x6; S[7] = x7; -} - -/** - * \brief Applies the inverse of the MDS matrix to the SATURNIN state. - * - * \param S The state. - */ -static void saturnin_mds_inverse(uint32_t S[8]) -{ - uint32_t x0, x1, x2, x3, x4, x5, x6, x7; - uint32_t tmp; - - /* Load the state into temporary working variables */ - x0 = S[0]; x1 = S[1]; x2 = S[2]; x3 = S[3]; - x4 = S[4]; x5 = S[5]; x6 = S[6]; x7 = S[7]; - - /* Apply the inverse of the MDS matrix to the state */ - #define MULINV(x0, x1, x2, x3, tmp) \ - do { \ - tmp = x3; x3 = x2; x2 = x1; x1 = x0; x0 = x1 ^ tmp; \ - } while (0) - x6 ^= SWAP(x2); x7 ^= SWAP(x3); - x4 ^= SWAP(x0); x5 ^= SWAP(x1); - x0 ^= x4; x1 ^= x5; x2 ^= x6; x3 ^= x7; - MULINV(x0, x1, x2, x3, tmp); - MULINV(x0, x1, x2, x3, tmp); - x6 ^= SWAP(x2); x7 ^= SWAP(x3); - x4 ^= SWAP(x0); x5 ^= SWAP(x1); - MULINV(x4, x5, x6, x7, tmp); - x0 ^= x4; x1 ^= x5; x2 ^= x6; x3 ^= x7; - - /* Store the temporary working variables back into the state */ - S[0] = x0; S[1] = x1; S[2] = x2; S[3] = x3; - S[4] = x4; S[5] = x5; S[6] = x6; S[7] = x7; -} - -/** - * \brief Applies the slice permutation to the SATURNIN state. - * - * \param S The state. - */ -static void saturnin_slice(uint32_t S[8]) -{ - leftRotate4_N(S[0], 0xFFFFU, 0, 0x3333, 2); - leftRotate4_N(S[1], 0xFFFFU, 0, 0x3333, 2); - leftRotate4_N(S[2], 0xFFFFU, 0, 0x3333, 2); - leftRotate4_N(S[3], 0xFFFFU, 0, 0x3333, 2); - - leftRotate4_N(S[4], 0x7777U, 1, 0x1111, 3); - leftRotate4_N(S[5], 0x7777U, 1, 0x1111, 3); - leftRotate4_N(S[6], 0x7777U, 1, 0x1111, 3); - leftRotate4_N(S[7], 0x7777U, 1, 0x1111, 3); -} - -/** - * \brief Applies the inverse of the slice permutation to the SATURNIN state. - * - * \param S The state. - */ -static void saturnin_slice_inverse(uint32_t S[8]) -{ - leftRotate4_N(S[0], 0xFFFFU, 0, 0x3333, 2); - leftRotate4_N(S[1], 0xFFFFU, 0, 0x3333, 2); - leftRotate4_N(S[2], 0xFFFFU, 0, 0x3333, 2); - leftRotate4_N(S[3], 0xFFFFU, 0, 0x3333, 2); - - leftRotate4_N(S[4], 0x1111U, 3, 0x7777, 1); - leftRotate4_N(S[5], 0x1111U, 3, 0x7777, 1); - leftRotate4_N(S[6], 0x1111U, 3, 0x7777, 1); - leftRotate4_N(S[7], 0x1111U, 3, 0x7777, 1); -} - -/** - * \brief Applies the sheet permutation to the SATURNIN state. - * - * \param S The state. - */ -static void saturnin_sheet(uint32_t S[8]) -{ - leftRotate16_N(S[0], 0xFFFFU, 0, 0x00FF, 8); - leftRotate16_N(S[1], 0xFFFFU, 0, 0x00FF, 8); - leftRotate16_N(S[2], 0xFFFFU, 0, 0x00FF, 8); - leftRotate16_N(S[3], 0xFFFFU, 0, 0x00FF, 8); - - leftRotate16_N(S[4], 0x0FFFU, 4, 0x000F, 12); - leftRotate16_N(S[5], 0x0FFFU, 4, 0x000F, 12); - leftRotate16_N(S[6], 0x0FFFU, 4, 0x000F, 12); - leftRotate16_N(S[7], 0x0FFFU, 4, 0x000F, 12); -} - -/** - * \brief Applies the inverse of the sheet permutation to the SATURNIN state. - * - * \param S The state. - */ -static void saturnin_sheet_inverse(uint32_t S[8]) -{ - leftRotate16_N(S[0], 0xFFFFU, 0, 0x00FF, 8); - leftRotate16_N(S[1], 0xFFFFU, 0, 0x00FF, 8); - leftRotate16_N(S[2], 0xFFFFU, 0, 0x00FF, 8); - leftRotate16_N(S[3], 0xFFFFU, 0, 0x00FF, 8); - - leftRotate16_N(S[4], 0x000FU, 12, 0x0FFF, 4); - leftRotate16_N(S[5], 0x000FU, 12, 0x0FFF, 4); - leftRotate16_N(S[6], 0x000FU, 12, 0x0FFF, 4); - leftRotate16_N(S[7], 0x000FU, 12, 0x0FFF, 4); -} - -/** - * \brief Encrypts a 256-bit block with the SATURNIN block cipher. - * - * \param output Ciphertext output block, 32 bytes. - * \param input Plaintext input block, 32 bytes. - * \param key Points to the 32 byte key for the block cipher. - * \param rounds Number of rounds to perform. - * \param RC Round constants to use for domain separation. - * - * The \a input and \a output buffers can be the same. - * - * \sa saturnin_block_decrypt() - */ -static void saturnin_block_encrypt - (unsigned char *output, const unsigned char *input, - const unsigned char *key, unsigned rounds, const uint32_t *RC) -{ - uint32_t K[16]; - uint32_t S[8]; - uint32_t temp; - unsigned index; - - /* Unpack the key and the input block */ - for (index = 0; index < 16; index += 2) { - temp = ((uint32_t)(key[index])) | - (((uint32_t)(key[index + 1])) << 8) | - (((uint32_t)(key[index + 16])) << 16) | - (((uint32_t)(key[index + 17])) << 24); - K[index / 2] = temp; - K[8 + (index / 2)] = ((temp & 0x001F001FU) << 11) | - ((temp >> 5) & 0x07FF07FFU); - S[index / 2] = ((uint32_t)(input[index])) | - (((uint32_t)(input[index + 1])) << 8) | - (((uint32_t)(input[index + 16])) << 16) | - (((uint32_t)(input[index + 17])) << 24); - } - - /* XOR the key into the state */ - saturnin_xor_key(); - - /* Perform all encryption rounds */ - for (; rounds > 0; rounds -= 2, RC += 2) { - saturnin_sbox(S); - saturnin_mds(S); - saturnin_sbox(S); - saturnin_slice(S); - saturnin_mds(S); - saturnin_slice_inverse(S); - S[0] ^= RC[0]; - saturnin_xor_key_rotated(); - - saturnin_sbox(S); - saturnin_mds(S); - saturnin_sbox(S); - saturnin_sheet(S); - saturnin_mds(S); - saturnin_sheet_inverse(S); - S[0] ^= RC[1]; - saturnin_xor_key(); - } - - /* Encode the state into the output block */ - for (index = 0; index < 16; index += 2) { - temp = S[index / 2]; - output[index] = (uint8_t)temp; - output[index + 1] = (uint8_t)(temp >> 8); - output[index + 16] = (uint8_t)(temp >> 16); - output[index + 17] = (uint8_t)(temp >> 24); - } -} - -/** - * \brief Decrypts a 256-bit block with the SATURNIN block cipher. - * - * \param output Plaintext output block, 32 bytes. - * \param input Ciphertext input block, 32 bytes. - * \param key Points to the 32 byte key for the block cipher. - * \param rounds Number of rounds to perform. - * \param RC Round constants to use for domain separation. - * - * The \a input and \a output buffers can be the same. - * - * \sa saturnin_block_encrypt() - */ -static void saturnin_block_decrypt - (unsigned char *output, const unsigned char *input, - const unsigned char *key, unsigned rounds, const uint32_t *RC) -{ - uint32_t K[16]; - uint32_t S[8]; - uint32_t temp; - unsigned index; - - /* Unpack the key and the input block */ - for (index = 0; index < 16; index += 2) { - temp = ((uint32_t)(key[index])) | - (((uint32_t)(key[index + 1])) << 8) | - (((uint32_t)(key[index + 16])) << 16) | - (((uint32_t)(key[index + 17])) << 24); - K[index / 2] = temp; - K[8 + (index / 2)] = ((temp & 0x001F001FU) << 11) | - ((temp >> 5) & 0x07FF07FFU); - S[index / 2] = ((uint32_t)(input[index])) | - (((uint32_t)(input[index + 1])) << 8) | - (((uint32_t)(input[index + 16])) << 16) | - (((uint32_t)(input[index + 17])) << 24); - } - - /* Perform all decryption rounds */ - RC += rounds - 2; - for (; rounds > 0; rounds -= 2, RC -= 2) { - saturnin_xor_key(); - S[0] ^= RC[1]; - saturnin_sheet(S); - saturnin_mds_inverse(S); - saturnin_sheet_inverse(S); - saturnin_sbox_inverse(S); - saturnin_mds_inverse(S); - saturnin_sbox_inverse(S); - - saturnin_xor_key_rotated(); - S[0] ^= RC[0]; - saturnin_slice(S); - saturnin_mds_inverse(S); - saturnin_slice_inverse(S); - saturnin_sbox_inverse(S); - saturnin_mds_inverse(S); - saturnin_sbox_inverse(S); - } - - /* XOR the key into the state */ - saturnin_xor_key(); - - /* Encode the state into the output block */ - for (index = 0; index < 16; index += 2) { - temp = S[index / 2]; - output[index] = (uint8_t)temp; - output[index + 1] = (uint8_t)(temp >> 8); - output[index + 16] = (uint8_t)(temp >> 16); - output[index + 17] = (uint8_t)(temp >> 24); - } -} - /** * \brief Encrypts a 256-bit block with the SATURNIN block cipher and * then XOR's itself to generate a new key. * * \param block Block to be encrypted and then XOR'ed with itself. * \param key Points to the 32 byte key for the block cipher. - * \param rounds Number of rounds to perform. - * \param RC Round constants to use for domain separation. + * \param domain Domain separator and round counter. */ -void saturnin_block_encrypt_xor - (const unsigned char *block, unsigned char *key, - unsigned rounds, const uint32_t *RC) +static void saturnin_block_encrypt_xor + (const unsigned char *block, unsigned char *key, unsigned domain) { - unsigned char temp[32]; - saturnin_block_encrypt(temp, block, key, rounds, RC); - lw_xor_block_2_src(key, block, temp, 32); + saturnin_key_schedule_t ks; + unsigned char *temp = (unsigned char *)ks.k; /* Reuse some stack space */ + saturnin_setup_key(&ks, key); + saturnin_encrypt_block(&ks, temp, block, domain); + lw_xor_block_2_src(key, block, temp, SATURNIN_BLOCK_SIZE); } /** @@ -499,20 +81,20 @@ void saturnin_block_encrypt_xor * \param c Output ciphertext buffer. * \param m Input plaintext buffer. * \param mlen Length of the plaintext in bytes. - * \param k Points to the 32-byte key. + * \param ks Points to the key schedule. * \param block Points to the pre-formatted nonce block. */ static void saturnin_ctr_encrypt (unsigned char *c, const unsigned char *m, unsigned long long mlen, - const unsigned char *k, unsigned char *block) + const saturnin_key_schedule_t *ks, unsigned char *block) { /* Note: Specification requires a 95-bit counter but we only use 32-bit. * This limits the maximum packet size to 128Gb. That should be OK */ uint32_t counter = 1; - unsigned char out[32]; + unsigned char out[SATURNIN_BLOCK_SIZE]; while (mlen >= 32) { be_store_word32(block + 28, counter); - saturnin_block_encrypt(out, block, k, 10, RC_10_1); + saturnin_encrypt_block(ks, out, block, SATURNIN_DOMAIN_10_1); lw_xor_block_2_src(c, out, m, 32); c += 32; m += 32; @@ -521,7 +103,7 @@ static void saturnin_ctr_encrypt } if (mlen > 0) { be_store_word32(block + 28, counter); - saturnin_block_encrypt(out, block, k, 10, RC_10_1); + saturnin_encrypt_block(ks, out, block, SATURNIN_DOMAIN_10_1); lw_xor_block_2_src(c, out, m, (unsigned)mlen); } } @@ -533,18 +115,17 @@ static void saturnin_ctr_encrypt * \param block Temporary block of 32 bytes from the caller. * \param m Points to the message to be authenticated. * \param mlen Length of the message to be authenticated in bytes. - * \param rounds Number of rounds to perform. - * \param RC1 Round constants to use for domain separation on full blocks. - * \param RC2 Round constants to use for domain separation on the last block. + * \param domain1 Round count and domain separator for full blocks. + * \param domain2 Round count and domain separator for the last block. */ static void saturnin_authenticate (unsigned char *tag, unsigned char *block, const unsigned char *m, unsigned long long mlen, - unsigned rounds, const uint32_t *RC1, const uint32_t *RC2) + unsigned domain1, unsigned domain2) { unsigned temp; while (mlen >= 32) { - saturnin_block_encrypt_xor(m, tag, rounds, RC1); + saturnin_block_encrypt_xor(m, tag, domain1); m += 32; mlen -= 32; } @@ -552,7 +133,7 @@ static void saturnin_authenticate memcpy(block, m, temp); block[temp] = 0x80; memset(block + temp + 1, 0, 31 - temp); - saturnin_block_encrypt_xor(block, tag, rounds, RC2); + saturnin_block_encrypt_xor(block, tag, domain2); } int saturnin_aead_encrypt @@ -563,6 +144,7 @@ int saturnin_aead_encrypt const unsigned char *npub, const unsigned char *k) { + saturnin_key_schedule_t ks; unsigned char block[32]; unsigned char *tag; (void)nsec; @@ -576,17 +158,20 @@ int saturnin_aead_encrypt memset(block + 17, 0, 15); /* Encrypt the plaintext in counter mode to produce the ciphertext */ - saturnin_ctr_encrypt(c, m, mlen, k, block); + saturnin_setup_key(&ks, k); + saturnin_ctr_encrypt(c, m, mlen, &ks, block); /* Set the counter back to zero and then encrypt the nonce */ tag = c + mlen; memcpy(tag, k, 32); memset(block + 17, 0, 15); - saturnin_block_encrypt_xor(block, tag, 10, RC_10_2); + saturnin_block_encrypt_xor(block, tag, SATURNIN_DOMAIN_10_2); /* Authenticate the associated data and the ciphertext */ - saturnin_authenticate(tag, block, ad, adlen, 10, RC_10_2, RC_10_3); - saturnin_authenticate(tag, block, c, mlen, 10, RC_10_4, RC_10_5); + saturnin_authenticate + (tag, block, ad, adlen, SATURNIN_DOMAIN_10_2, SATURNIN_DOMAIN_10_3); + saturnin_authenticate + (tag, block, c, mlen, SATURNIN_DOMAIN_10_4, SATURNIN_DOMAIN_10_5); return 0; } @@ -598,6 +183,7 @@ int saturnin_aead_decrypt const unsigned char *npub, const unsigned char *k) { + saturnin_key_schedule_t ks; unsigned char block[32]; unsigned char tag[32]; (void)nsec; @@ -614,17 +200,20 @@ int saturnin_aead_decrypt /* Encrypt the nonce to initialize the authentication phase */ memcpy(tag, k, 32); - saturnin_block_encrypt_xor(block, tag, 10, RC_10_2); + saturnin_block_encrypt_xor(block, tag, SATURNIN_DOMAIN_10_2); /* Authenticate the associated data and the ciphertext */ - saturnin_authenticate(tag, block, ad, adlen, 10, RC_10_2, RC_10_3); - saturnin_authenticate(tag, block, c, *mlen, 10, RC_10_4, RC_10_5); + saturnin_authenticate + (tag, block, ad, adlen, SATURNIN_DOMAIN_10_2, SATURNIN_DOMAIN_10_3); + saturnin_authenticate + (tag, block, c, *mlen, SATURNIN_DOMAIN_10_4, SATURNIN_DOMAIN_10_5); /* Decrypt the ciphertext in counter mode to produce the plaintext */ memcpy(block, npub, 16); block[16] = 0x80; memset(block + 17, 0, 15); - saturnin_ctr_encrypt(m, c, *mlen, k, block); + saturnin_setup_key(&ks, k); + saturnin_ctr_encrypt(m, c, *mlen, &ks, block); /* Check the authentication tag at the end of the message */ return aead_check_tag @@ -639,6 +228,7 @@ int saturnin_short_aead_encrypt const unsigned char *npub, const unsigned char *k) { + saturnin_key_schedule_t ks; unsigned char block[32]; unsigned temp; (void)nsec; @@ -656,7 +246,8 @@ int saturnin_short_aead_encrypt memset(block + 17 + temp, 0, 15 - temp); /* Encrypt the input block to produce the output ciphertext */ - saturnin_block_encrypt(c, block, k, 10, RC_10_6); + saturnin_setup_key(&ks, k); + saturnin_encrypt_block(&ks, c, block, SATURNIN_DOMAIN_10_6); *clen = 32; return 0; } @@ -669,6 +260,7 @@ int saturnin_short_aead_decrypt const unsigned char *npub, const unsigned char *k) { + saturnin_key_schedule_t ks; unsigned char block[32]; unsigned check1, check2, len; int index, result; @@ -682,7 +274,8 @@ int saturnin_short_aead_decrypt return -1; /* Decrypt the ciphertext block */ - saturnin_block_decrypt(block, c, k, 10, RC_10_6); + saturnin_setup_key(&ks, k); + saturnin_decrypt_block(&ks, block, c, SATURNIN_DOMAIN_10_6); /* Verify that the output block starts with the nonce and that it is * padded correctly. We need to do this very carefully to avoid leaking @@ -723,7 +316,8 @@ int saturnin_hash unsigned char tag[32]; unsigned char block[32]; memset(tag, 0, sizeof(tag)); - saturnin_authenticate(tag, block, in, inlen, 16, RC_16_7, RC_16_8); + saturnin_authenticate + (tag, block, in, inlen, SATURNIN_DOMAIN_16_7, SATURNIN_DOMAIN_16_8); memcpy(out, tag, 32); return 0; } @@ -752,12 +346,14 @@ void saturnin_hash_update state->s.count = 0; in += temp; inlen -= temp; - saturnin_block_encrypt_xor(state->s.block, state->s.hash, 16, RC_16_7); + saturnin_block_encrypt_xor + (state->s.block, state->s.hash, SATURNIN_DOMAIN_16_7); } /* Process full blocks that are aligned at state->s.count == 0 */ while (inlen >= 32) { - saturnin_block_encrypt_xor(in, state->s.hash, 16, RC_16_7); + saturnin_block_encrypt_xor + (in, state->s.hash, SATURNIN_DOMAIN_16_7); in += 32; inlen -= 32; } @@ -776,6 +372,7 @@ void saturnin_hash_finalize memset(state->s.block + state->s.count + 1, 0, 31 - state->s.count); /* Generate the final hash value */ - saturnin_block_encrypt_xor(state->s.block, state->s.hash, 16, RC_16_8); + saturnin_block_encrypt_xor + (state->s.block, state->s.hash, SATURNIN_DOMAIN_16_8); memcpy(out, state->s.hash, 32); } diff --git a/saturnin/Implementations/crypto_hash/saturninhashv2/rhys/internal-saturnin-avr.S b/saturnin/Implementations/crypto_hash/saturninhashv2/rhys/internal-saturnin-avr.S new file mode 100644 index 0000000..f20ce72 --- /dev/null +++ b/saturnin/Implementations/crypto_hash/saturninhashv2/rhys/internal-saturnin-avr.S @@ -0,0 +1,6365 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_0, @object + .size table_0, 384 +table_0: + .byte 194 + .byte 38 + .byte 176 + .byte 78 + .byte 3 + .byte 83 + .byte 89 + .byte 144 + .byte 50 + .byte 230 + .byte 143 + .byte 170 + .byte 146 + .byte 138 + .byte 146 + .byte 254 + .byte 25 + .byte 164 + .byte 21 + .byte 65 + .byte 50 + .byte 149 + .byte 83 + .byte 147 + .byte 78 + .byte 204 + .byte 177 + .byte 93 + .byte 202 + .byte 21 + .byte 21 + .byte 84 + .byte 168 + .byte 85 + .byte 31 + .byte 189 + .byte 13 + .byte 26 + .byte 110 + .byte 90 + .byte 181 + .byte 38 + .byte 69 + .byte 78 + .byte 240 + .byte 95 + .byte 86 + .byte 163 + .byte 216 + .byte 32 + .byte 143 + .byte 15 + .byte 225 + .byte 190 + .byte 84 + .byte 11 + .byte 157 + .byte 108 + .byte 26 + .byte 125 + .byte 10 + .byte 40 + .byte 166 + .byte 23 + .byte 134 + .byte 201 + .byte 70 + .byte 170 + .byte 98 + .byte 144 + .byte 25 + .byte 193 + .byte 222 + .byte 92 + .byte 44 + .byte 24 + .byte 254 + .byte 83 + .byte 13 + .byte 160 + .byte 152 + .byte 38 + .byte 22 + .byte 78 + .byte 161 + .byte 91 + .byte 83 + .byte 178 + .byte 101 + .byte 157 + .byte 143 + .byte 108 + .byte 48 + .byte 173 + .byte 22 + .byte 88 + .byte 250 + .byte 212 + .byte 31 + .byte 105 + .byte 249 + .byte 188 + .byte 245 + .byte 107 + .byte 37 + .byte 53 + .byte 235 + .byte 248 + .byte 250 + .byte 236 + .byte 29 + .byte 178 + .byte 23 + .byte 164 + .byte 61 + .byte 123 + .byte 180 + .byte 148 + .byte 44 + .byte 246 + .byte 91 + .byte 38 + .byte 175 + .byte 79 + .byte 22 + .byte 70 + .byte 72 + .byte 197 + .byte 33 + .byte 173 + .byte 220 + .byte 69 + .byte 7 + .byte 214 + .byte 139 + .byte 224 + .byte 184 + .byte 253 + .byte 4 + .byte 5 + .byte 87 + .byte 82 + .byte 31 + .byte 30 + .byte 22 + .byte 194 + .byte 251 + .byte 69 + .byte 31 + .byte 155 + .byte 82 + .byte 235 + .byte 50 + .byte 78 + .byte 25 + .byte 82 + .byte 24 + .byte 192 + .byte 152 + .byte 84 + .byte 118 + .byte 38 + .byte 252 + .byte 79 + .byte 71 + .byte 66 + .byte 77 + .byte 212 + .byte 156 + .byte 16 + .byte 220 + .byte 38 + .byte 214 + .byte 197 + .byte 201 + .byte 179 + .byte 223 + .byte 69 + .byte 1 + .byte 17 + .byte 164 + .byte 198 + .byte 76 + .byte 98 + .byte 181 + .byte 62 + .byte 86 + .byte 23 + .byte 135 + .byte 231 + .byte 86 + .byte 152 + .byte 251 + .byte 182 + .byte 8 + .byte 49 + .byte 82 + .byte 7 + .byte 185 + .byte 2 + .byte 1 + .byte 38 + .byte 9 + .byte 79 + .byte 180 + .byte 78 + .byte 66 + .byte 231 + .byte 118 + .byte 214 + .byte 220 + .byte 131 + .byte 165 + .byte 241 + .byte 15 + .byte 70 + .byte 91 + .byte 141 + .byte 14 + .byte 45 + .byte 156 + .byte 123 + .byte 185 + .byte 230 + .byte 125 + .byte 59 + .byte 161 + .byte 224 + .byte 47 + .byte 98 + .byte 90 + .byte 13 + .byte 141 + .byte 191 + .byte 59 + .byte 148 + .byte 161 + .byte 78 + .byte 218 + .byte 248 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 12 + .byte 24 + .byte 186 + .byte 63 + .byte 171 + .byte 185 + .byte 58 + .byte 86 + .byte 239 + .byte 165 + .byte 94 + .byte 18 + .byte 108 + .byte 162 + .byte 157 + .byte 133 + .byte 155 + .byte 119 + .byte 207 + .byte 184 + .byte 147 + .byte 231 + .byte 77 + .byte 125 + .byte 159 + .byte 180 + .byte 239 + .byte 7 + .byte 6 + .byte 83 + .byte 82 + .byte 141 + .byte 171 + .byte 230 + .byte 8 + .byte 30 + .byte 135 + .byte 159 + .byte 114 + .byte 65 + .byte 10 + .byte 239 + .byte 74 + .byte 140 + .byte 167 + .byte 201 + .byte 160 + .byte 74 + .byte 239 + .byte 149 + .byte 58 + .byte 217 + .byte 175 + .byte 210 + .byte 0 + .byte 187 + .byte 240 + .byte 91 + .byte 44 + .byte 182 + .byte 216 + .byte 148 + .byte 109 + .byte 56 + .byte 167 + .byte 25 + .byte 155 + .byte 60 + .byte 148 + .byte 134 + .byte 9 + .byte 169 + .byte 218 + .byte 120 + .byte 248 + .byte 35 + .byte 211 + .byte 71 + .byte 182 + .byte 167 + .byte 120 + .byte 157 + .byte 252 + .byte 116 + .byte 17 + .byte 174 + .byte 202 + .byte 234 + .byte 119 + .byte 166 + .byte 49 + .byte 47 + .byte 84 + .byte 192 + .byte 200 + .byte 76 + .byte 5 + .byte 202 + .byte 81 + .byte 47 + .byte 149 + .byte 241 + .byte 104 + .byte 82 + .byte 43 + .byte 138 + .byte 91 + .byte 79 + .byte 172 + .byte 180 + .byte 20 + .byte 246 + .byte 1 + .byte 84 + .byte 217 + .byte 241 + .byte 104 + .byte 37 + .byte 77 + .byte 118 + .byte 17 + .byte 54 + .byte 73 + .byte 106 + .byte 62 + .byte 156 + .byte 239 + .byte 142 + + .text +.global saturnin_setup_key + .type saturnin_setup_key, @function +saturnin_setup_key: + movw r30,r24 + movw r26,r22 +.L__stack_usage = 2 + ld r18,X+ + ld r19,X+ + st Z,r18 + std Z+1,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+32,r18 + std Z+33,r19 + ld r18,X+ + ld r19,X+ + std Z+4,r18 + std Z+5,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+36,r18 + std Z+37,r19 + ld r18,X+ + ld r19,X+ + std Z+8,r18 + std Z+9,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+40,r18 + std Z+41,r19 + ld r18,X+ + ld r19,X+ + std Z+12,r18 + std Z+13,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+44,r18 + std Z+45,r19 + ld r18,X+ + ld r19,X+ + std Z+16,r18 + std Z+17,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+48,r18 + std Z+49,r19 + ld r18,X+ + ld r19,X+ + std Z+20,r18 + std Z+21,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+52,r18 + std Z+53,r19 + ld r18,X+ + ld r19,X+ + std Z+24,r18 + std Z+25,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+56,r18 + std Z+57,r19 + ld r18,X+ + ld r19,X+ + std Z+28,r18 + std Z+29,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+60,r18 + std Z+61,r19 + ld r18,X+ + ld r19,X+ + std Z+2,r18 + std Z+3,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+34,r18 + std Z+35,r19 + ld r18,X+ + ld r19,X+ + std Z+6,r18 + std Z+7,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+38,r18 + std Z+39,r19 + ld r18,X+ + ld r19,X+ + std Z+10,r18 + std Z+11,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+42,r18 + std Z+43,r19 + ld r18,X+ + ld r19,X+ + std Z+14,r18 + std Z+15,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+46,r18 + std Z+47,r19 + ld r18,X+ + ld r19,X+ + std Z+18,r18 + std Z+19,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+50,r18 + std Z+51,r19 + ld r18,X+ + ld r19,X+ + std Z+22,r18 + std Z+23,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+54,r18 + std Z+55,r19 + ld r18,X+ + ld r19,X+ + std Z+26,r18 + std Z+27,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+58,r18 + std Z+59,r19 + ld r18,X+ + ld r19,X+ + std Z+30,r18 + std Z+31,r19 + mov r0,r18 + mov r18,r19 + mov r19,r0 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + lsl r18 + rol r19 + adc r18,r1 + std Z+62,r18 + std Z+63,r19 + ret + .size saturnin_setup_key, .-saturnin_setup_key + + .text +.global saturnin_encrypt_block + .type saturnin_encrypt_block, @function +saturnin_encrypt_block: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r23 + push r22 + movw r30,r24 + movw r26,r20 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 52 + ld r2,X+ + ld r3,X+ + ld r0,Z + eor r2,r0 + ldd r0,Z+1 + eor r3,r0 + ld r6,X+ + ld r7,X+ + ldd r0,Z+4 + eor r6,r0 + ldd r0,Z+5 + eor r7,r0 + ld r10,X+ + ld r11,X+ + ldd r0,Z+8 + eor r10,r0 + ldd r0,Z+9 + eor r11,r0 + ld r14,X+ + ld r15,X+ + ldd r0,Z+12 + eor r14,r0 + ldd r0,Z+13 + eor r15,r0 + ld r20,X+ + ld r21,X+ + ldd r0,Z+16 + eor r20,r0 + ldd r0,Z+17 + eor r21,r0 + std Y+17,r20 + std Y+18,r21 + ld r20,X+ + ld r21,X+ + ldd r0,Z+20 + eor r20,r0 + ldd r0,Z+21 + eor r21,r0 + std Y+21,r20 + std Y+22,r21 + ld r20,X+ + ld r21,X+ + ldd r0,Z+24 + eor r20,r0 + ldd r0,Z+25 + eor r21,r0 + std Y+25,r20 + std Y+26,r21 + ld r20,X+ + ld r21,X+ + ldd r0,Z+28 + eor r20,r0 + ldd r0,Z+29 + eor r21,r0 + std Y+29,r20 + std Y+30,r21 + ld r4,X+ + ld r5,X+ + ldd r0,Z+2 + eor r4,r0 + ldd r0,Z+3 + eor r5,r0 + ld r8,X+ + ld r9,X+ + ldd r0,Z+6 + eor r8,r0 + ldd r0,Z+7 + eor r9,r0 + ld r12,X+ + ld r13,X+ + ldd r0,Z+10 + eor r12,r0 + ldd r0,Z+11 + eor r13,r0 + ld r24,X+ + ld r25,X+ + ldd r0,Z+14 + eor r24,r0 + ldd r0,Z+15 + eor r25,r0 + ld r20,X+ + ld r21,X+ + ldd r0,Z+18 + eor r20,r0 + ldd r0,Z+19 + eor r21,r0 + std Y+19,r20 + std Y+20,r21 + ld r20,X+ + ld r21,X+ + ldd r0,Z+22 + eor r20,r0 + ldd r0,Z+23 + eor r21,r0 + std Y+23,r20 + std Y+24,r21 + ld r20,X+ + ld r21,X+ + ldd r0,Z+26 + eor r20,r0 + ldd r0,Z+27 + eor r21,r0 + std Y+27,r20 + std Y+28,r21 + ld r20,X+ + ld r21,X+ + ldd r0,Z+30 + eor r20,r0 + ldd r0,Z+31 + eor r21,r0 + std Y+31,r20 + std Y+32,r21 + ldi r16,5 + cpi r18,60 + cpc r19,r1 + brcs 120f + ldi r16,8 + ldi r17,4 + add r18,r17 + adc r19,r1 +120: + lsl r18 + rol r19 + lsl r18 + rol r19 + rjmp 1447f +126: + mov r0,r6 + and r0,r10 + eor r2,r0 + mov r0,r7 + and r0,r11 + eor r3,r0 + mov r0,r8 + and r0,r12 + eor r4,r0 + mov r0,r9 + and r0,r13 + eor r5,r0 + mov r0,r2 + or r0,r14 + eor r6,r0 + mov r0,r3 + or r0,r15 + eor r7,r0 + mov r0,r4 + or r0,r24 + eor r8,r0 + mov r0,r5 + or r0,r25 + eor r9,r0 + mov r0,r6 + or r0,r10 + eor r14,r0 + mov r0,r7 + or r0,r11 + eor r15,r0 + mov r0,r8 + or r0,r12 + eor r24,r0 + mov r0,r9 + or r0,r13 + eor r25,r0 + mov r0,r6 + and r0,r14 + eor r10,r0 + mov r0,r7 + and r0,r15 + eor r11,r0 + mov r0,r8 + and r0,r24 + eor r12,r0 + mov r0,r9 + and r0,r25 + eor r13,r0 + mov r0,r2 + or r0,r10 + eor r6,r0 + mov r0,r3 + or r0,r11 + eor r7,r0 + mov r0,r4 + or r0,r12 + eor r8,r0 + mov r0,r5 + or r0,r13 + eor r9,r0 + mov r0,r6 + or r0,r14 + eor r2,r0 + mov r0,r7 + or r0,r15 + eor r3,r0 + mov r0,r8 + or r0,r24 + eor r4,r0 + mov r0,r9 + or r0,r25 + eor r5,r0 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Y+17 + ldd r3,Y+18 + ldd r4,Y+19 + ldd r5,Y+20 + ldd r6,Y+21 + ldd r7,Y+22 + ldd r8,Y+23 + ldd r9,Y+24 + ldd r10,Y+25 + ldd r11,Y+26 + ldd r12,Y+27 + ldd r13,Y+28 + ldd r14,Y+29 + ldd r15,Y+30 + ldd r24,Y+31 + ldd r25,Y+32 + mov r0,r6 + and r0,r10 + eor r2,r0 + mov r0,r7 + and r0,r11 + eor r3,r0 + mov r0,r8 + and r0,r12 + eor r4,r0 + mov r0,r9 + and r0,r13 + eor r5,r0 + mov r0,r2 + or r0,r14 + eor r6,r0 + mov r0,r3 + or r0,r15 + eor r7,r0 + mov r0,r4 + or r0,r24 + eor r8,r0 + mov r0,r5 + or r0,r25 + eor r9,r0 + mov r0,r6 + or r0,r10 + eor r14,r0 + mov r0,r7 + or r0,r11 + eor r15,r0 + mov r0,r8 + or r0,r12 + eor r24,r0 + mov r0,r9 + or r0,r13 + eor r25,r0 + mov r0,r6 + and r0,r14 + eor r10,r0 + mov r0,r7 + and r0,r15 + eor r11,r0 + mov r0,r8 + and r0,r24 + eor r12,r0 + mov r0,r9 + and r0,r25 + eor r13,r0 + mov r0,r2 + or r0,r10 + eor r6,r0 + mov r0,r3 + or r0,r11 + eor r7,r0 + mov r0,r4 + or r0,r12 + eor r8,r0 + mov r0,r5 + or r0,r13 + eor r9,r0 + mov r0,r6 + or r0,r14 + eor r2,r0 + mov r0,r7 + or r0,r15 + eor r3,r0 + mov r0,r8 + or r0,r24 + eor r4,r0 + mov r0,r9 + or r0,r25 + eor r5,r0 + ldd r0,Y+5 + eor r0,r14 + std Y+5,r0 + ldd r0,Y+6 + eor r0,r15 + std Y+6,r0 + ldd r0,Y+7 + eor r0,r24 + std Y+7,r0 + ldd r0,Y+8 + eor r0,r25 + std Y+8,r0 + ldd r0,Y+9 + eor r0,r6 + std Y+9,r0 + ldd r0,Y+10 + eor r0,r7 + std Y+10,r0 + ldd r0,Y+11 + eor r0,r8 + std Y+11,r0 + ldd r0,Y+12 + eor r0,r9 + std Y+12,r0 + ldd r0,Y+13 + eor r0,r2 + std Y+13,r0 + ldd r0,Y+14 + eor r0,r3 + std Y+14,r0 + ldd r0,Y+15 + eor r0,r4 + std Y+15,r0 + ldd r0,Y+16 + eor r0,r5 + std Y+16,r0 + ldd r0,Y+1 + eor r0,r10 + std Y+1,r0 + ldd r0,Y+2 + eor r0,r11 + std Y+2,r0 + ldd r0,Y+3 + eor r0,r12 + std Y+3,r0 + ldd r0,Y+4 + eor r0,r13 + std Y+4,r0 + movw r20,r14 + movw r22,r24 + movw r14,r6 + movw r24,r8 + movw r6,r2 + movw r8,r4 + movw r2,r10 + movw r4,r12 + movw r10,r20 + movw r12,r22 + eor r10,r14 + eor r11,r15 + eor r12,r24 + eor r13,r25 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+9 + ldd r23,Y+10 + eor r20,r22 + eor r21,r23 + std Y+5,r22 + std Y+6,r23 + ldd r22,Y+13 + ldd r23,Y+14 + std Y+9,r22 + std Y+10,r23 + ldd r22,Y+1 + ldd r23,Y+2 + std Y+13,r22 + std Y+14,r23 + std Y+1,r20 + std Y+2,r21 + ldd r20,Y+7 + ldd r21,Y+8 + ldd r22,Y+11 + ldd r23,Y+12 + eor r20,r22 + eor r21,r23 + std Y+7,r22 + std Y+8,r23 + ldd r22,Y+15 + ldd r23,Y+16 + std Y+11,r22 + std Y+12,r23 + ldd r22,Y+3 + ldd r23,Y+4 + std Y+15,r22 + std Y+16,r23 + std Y+3,r20 + std Y+4,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+9 + ldd r23,Y+10 + eor r20,r22 + eor r21,r23 + eor r22,r14 + eor r23,r15 + std Y+5,r22 + std Y+6,r23 + ldd r22,Y+13 + ldd r23,Y+14 + eor r22,r6 + eor r23,r7 + std Y+9,r22 + std Y+10,r23 + ldd r22,Y+1 + ldd r23,Y+2 + eor r22,r2 + eor r23,r3 + std Y+13,r22 + std Y+14,r23 + eor r20,r10 + eor r21,r11 + std Y+1,r20 + std Y+2,r21 + ldd r20,Y+7 + ldd r21,Y+8 + ldd r22,Y+11 + ldd r23,Y+12 + eor r20,r22 + eor r21,r23 + eor r22,r24 + eor r23,r25 + std Y+7,r22 + std Y+8,r23 + ldd r22,Y+15 + ldd r23,Y+16 + eor r22,r8 + eor r23,r9 + std Y+11,r22 + std Y+12,r23 + ldd r22,Y+3 + ldd r23,Y+4 + eor r22,r4 + eor r23,r5 + std Y+15,r22 + std Y+16,r23 + eor r20,r12 + eor r21,r13 + std Y+3,r20 + std Y+4,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + mov r0,r6 + and r0,r2 + eor r14,r0 + mov r0,r7 + and r0,r3 + eor r15,r0 + mov r0,r8 + and r0,r4 + eor r24,r0 + mov r0,r9 + and r0,r5 + eor r25,r0 + mov r0,r14 + or r0,r10 + eor r6,r0 + mov r0,r15 + or r0,r11 + eor r7,r0 + mov r0,r24 + or r0,r12 + eor r8,r0 + mov r0,r25 + or r0,r13 + eor r9,r0 + mov r0,r6 + or r0,r2 + eor r10,r0 + mov r0,r7 + or r0,r3 + eor r11,r0 + mov r0,r8 + or r0,r4 + eor r12,r0 + mov r0,r9 + or r0,r5 + eor r13,r0 + mov r0,r6 + and r0,r10 + eor r2,r0 + mov r0,r7 + and r0,r11 + eor r3,r0 + mov r0,r8 + and r0,r12 + eor r4,r0 + mov r0,r9 + and r0,r13 + eor r5,r0 + mov r0,r14 + or r0,r2 + eor r6,r0 + mov r0,r15 + or r0,r3 + eor r7,r0 + mov r0,r24 + or r0,r4 + eor r8,r0 + mov r0,r25 + or r0,r5 + eor r9,r0 + mov r0,r6 + or r0,r10 + eor r14,r0 + mov r0,r7 + or r0,r11 + eor r15,r0 + mov r0,r8 + or r0,r12 + eor r24,r0 + mov r0,r9 + or r0,r13 + eor r25,r0 + movw r20,r14 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r14,r22 + and r15,r22 + lsl r14 + rol r15 + or r14,r20 + or r15,r21 + movw r20,r24 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r24,r22 + and r25,r22 + lsl r24 + rol r25 + lsl r24 + rol r25 + lsl r24 + rol r25 + or r24,r20 + or r25,r21 + movw r20,r6 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r6,r22 + and r7,r22 + lsl r6 + rol r7 + or r6,r20 + or r7,r21 + movw r20,r8 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r8,r22 + and r9,r22 + lsl r8 + rol r9 + lsl r8 + rol r9 + lsl r8 + rol r9 + or r8,r20 + or r9,r21 + movw r20,r2 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r2,r22 + and r3,r22 + lsl r2 + rol r3 + or r2,r20 + or r3,r21 + movw r20,r4 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r4,r22 + and r5,r22 + lsl r4 + rol r5 + lsl r4 + rol r5 + lsl r4 + rol r5 + or r4,r20 + or r5,r21 + movw r20,r10 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r10,r22 + and r11,r22 + lsl r10 + rol r11 + or r10,r20 + or r11,r21 + movw r20,r12 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r12,r22 + and r13,r22 + lsl r12 + rol r13 + lsl r12 + rol r13 + lsl r12 + rol r13 + or r12,r20 + or r13,r21 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r3,Y+2 + ldd r4,Y+3 + ldd r5,Y+4 + ldd r6,Y+5 + ldd r7,Y+6 + ldd r8,Y+7 + ldd r9,Y+8 + ldd r10,Y+9 + ldd r11,Y+10 + ldd r12,Y+11 + ldd r13,Y+12 + ldd r14,Y+13 + ldd r15,Y+14 + ldd r24,Y+15 + ldd r25,Y+16 + mov r0,r10 + and r0,r14 + eor r6,r0 + mov r0,r11 + and r0,r15 + eor r7,r0 + mov r0,r12 + and r0,r24 + eor r8,r0 + mov r0,r13 + and r0,r25 + eor r9,r0 + mov r0,r6 + or r0,r2 + eor r10,r0 + mov r0,r7 + or r0,r3 + eor r11,r0 + mov r0,r8 + or r0,r4 + eor r12,r0 + mov r0,r9 + or r0,r5 + eor r13,r0 + mov r0,r10 + or r0,r14 + eor r2,r0 + mov r0,r11 + or r0,r15 + eor r3,r0 + mov r0,r12 + or r0,r24 + eor r4,r0 + mov r0,r13 + or r0,r25 + eor r5,r0 + mov r0,r10 + and r0,r2 + eor r14,r0 + mov r0,r11 + and r0,r3 + eor r15,r0 + mov r0,r12 + and r0,r4 + eor r24,r0 + mov r0,r13 + and r0,r5 + eor r25,r0 + mov r0,r6 + or r0,r14 + eor r10,r0 + mov r0,r7 + or r0,r15 + eor r11,r0 + mov r0,r8 + or r0,r24 + eor r12,r0 + mov r0,r9 + or r0,r25 + eor r13,r0 + mov r0,r10 + or r0,r2 + eor r6,r0 + mov r0,r11 + or r0,r3 + eor r7,r0 + mov r0,r12 + or r0,r4 + eor r8,r0 + mov r0,r13 + or r0,r5 + eor r9,r0 + movw r20,r8 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r8,r22 + and r9,r22 + lsl r8 + rol r9 + lsl r8 + rol r9 + or r8,r20 + or r9,r21 + movw r20,r12 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r12,r22 + and r13,r22 + lsl r12 + rol r13 + lsl r12 + rol r13 + or r12,r20 + or r13,r21 + movw r20,r24 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r24,r22 + and r25,r22 + lsl r24 + rol r25 + lsl r24 + rol r25 + or r24,r20 + or r25,r21 + movw r20,r4 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r4,r22 + and r5,r22 + lsl r4 + rol r5 + lsl r4 + rol r5 + or r4,r20 + or r5,r21 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Y+17 + ldd r3,Y+18 + ldd r4,Y+19 + ldd r5,Y+20 + ldd r6,Y+21 + ldd r7,Y+22 + ldd r8,Y+23 + ldd r9,Y+24 + ldd r10,Y+25 + ldd r11,Y+26 + ldd r12,Y+27 + ldd r13,Y+28 + ldd r14,Y+29 + ldd r15,Y+30 + ldd r24,Y+31 + ldd r25,Y+32 + ldd r0,Y+9 + eor r0,r10 + std Y+9,r0 + ldd r0,Y+10 + eor r0,r11 + std Y+10,r0 + ldd r0,Y+11 + eor r0,r12 + std Y+11,r0 + ldd r0,Y+12 + eor r0,r13 + std Y+12,r0 + ldd r0,Y+13 + eor r0,r6 + std Y+13,r0 + ldd r0,Y+14 + eor r0,r7 + std Y+14,r0 + ldd r0,Y+15 + eor r0,r8 + std Y+15,r0 + ldd r0,Y+16 + eor r0,r9 + std Y+16,r0 + ldd r0,Y+1 + eor r0,r14 + std Y+1,r0 + ldd r0,Y+2 + eor r0,r15 + std Y+2,r0 + ldd r0,Y+3 + eor r0,r24 + std Y+3,r0 + ldd r0,Y+4 + eor r0,r25 + std Y+4,r0 + ldd r0,Y+5 + eor r0,r2 + std Y+5,r0 + ldd r0,Y+6 + eor r0,r3 + std Y+6,r0 + ldd r0,Y+7 + eor r0,r4 + std Y+7,r0 + ldd r0,Y+8 + eor r0,r5 + std Y+8,r0 + movw r20,r10 + movw r22,r12 + movw r10,r6 + movw r12,r8 + movw r6,r14 + movw r8,r24 + movw r14,r2 + movw r24,r4 + movw r2,r20 + movw r4,r22 + eor r2,r10 + eor r3,r11 + eor r4,r12 + eor r5,r13 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+13 + ldd r23,Y+14 + eor r20,r22 + eor r21,r23 + std Y+9,r22 + std Y+10,r23 + ldd r22,Y+1 + ldd r23,Y+2 + std Y+13,r22 + std Y+14,r23 + ldd r22,Y+5 + ldd r23,Y+6 + std Y+1,r22 + std Y+2,r23 + std Y+5,r20 + std Y+6,r21 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r22,Y+15 + ldd r23,Y+16 + eor r20,r22 + eor r21,r23 + std Y+11,r22 + std Y+12,r23 + ldd r22,Y+3 + ldd r23,Y+4 + std Y+15,r22 + std Y+16,r23 + ldd r22,Y+7 + ldd r23,Y+8 + std Y+3,r22 + std Y+4,r23 + std Y+7,r20 + std Y+8,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+13 + ldd r23,Y+14 + eor r20,r22 + eor r21,r23 + eor r22,r10 + eor r23,r11 + std Y+9,r22 + std Y+10,r23 + ldd r22,Y+1 + ldd r23,Y+2 + eor r22,r6 + eor r23,r7 + std Y+13,r22 + std Y+14,r23 + ldd r22,Y+5 + ldd r23,Y+6 + eor r22,r14 + eor r23,r15 + std Y+1,r22 + std Y+2,r23 + eor r20,r2 + eor r21,r3 + std Y+5,r20 + std Y+6,r21 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r22,Y+15 + ldd r23,Y+16 + eor r20,r22 + eor r21,r23 + eor r22,r12 + eor r23,r13 + std Y+11,r22 + std Y+12,r23 + ldd r22,Y+3 + ldd r23,Y+4 + eor r22,r8 + eor r23,r9 + std Y+15,r22 + std Y+16,r23 + ldd r22,Y+7 + ldd r23,Y+8 + eor r22,r24 + eor r23,r25 + std Y+3,r22 + std Y+4,r23 + eor r20,r4 + eor r21,r5 + std Y+7,r20 + std Y+8,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + movw r20,r12 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r12,r22 + and r13,r22 + lsl r12 + rol r13 + or r12,r20 + or r13,r21 + movw r20,r10 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r10,r22 + and r11,r22 + lsl r10 + rol r11 + lsl r10 + rol r11 + lsl r10 + rol r11 + or r10,r20 + or r11,r21 + movw r20,r8 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r8,r22 + and r9,r22 + lsl r8 + rol r9 + or r8,r20 + or r9,r21 + movw r20,r6 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r6,r22 + and r7,r22 + lsl r6 + rol r7 + lsl r6 + rol r7 + lsl r6 + rol r7 + or r6,r20 + or r7,r21 + movw r20,r24 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r24,r22 + and r25,r22 + lsl r24 + rol r25 + or r24,r20 + or r25,r21 + movw r20,r14 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r14,r22 + and r15,r22 + lsl r14 + rol r15 + lsl r14 + rol r15 + lsl r14 + rol r15 + or r14,r20 + or r15,r21 + movw r20,r4 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r4,r22 + and r5,r22 + lsl r4 + rol r5 + or r4,r20 + or r5,r21 + movw r20,r2 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r2,r22 + and r3,r22 + lsl r2 + rol r3 + lsl r2 + rol r3 + lsl r2 + rol r3 + or r2,r20 + or r3,r21 + ldd r0,Z+48 + eor r10,r0 + ldd r0,Z+49 + eor r11,r0 + ldd r0,Z+50 + eor r12,r0 + ldd r0,Z+51 + eor r13,r0 + ldd r0,Z+52 + eor r6,r0 + ldd r0,Z+53 + eor r7,r0 + ldd r0,Z+54 + eor r8,r0 + ldd r0,Z+55 + eor r9,r0 + ldd r0,Z+56 + eor r14,r0 + ldd r0,Z+57 + eor r15,r0 + ldd r0,Z+58 + eor r24,r0 + ldd r0,Z+59 + eor r25,r0 + ldd r0,Z+60 + eor r2,r0 + ldd r0,Z+61 + eor r3,r0 + ldd r0,Z+62 + eor r4,r0 + ldd r0,Z+63 + eor r5,r0 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r3,Y+2 + ldd r4,Y+3 + ldd r5,Y+4 + ldd r6,Y+5 + ldd r7,Y+6 + ldd r8,Y+7 + ldd r9,Y+8 + ldd r10,Y+9 + ldd r11,Y+10 + ldd r12,Y+11 + ldd r13,Y+12 + ldd r14,Y+13 + ldd r15,Y+14 + ldd r24,Y+15 + ldd r25,Y+16 + movw r20,r12 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r12,r22 + and r13,r22 + lsl r12 + rol r13 + lsl r12 + rol r13 + or r12,r20 + or r13,r21 + movw r20,r24 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r24,r22 + and r25,r22 + lsl r24 + rol r25 + lsl r24 + rol r25 + or r24,r20 + or r25,r21 + movw r20,r4 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r4,r22 + and r5,r22 + lsl r4 + rol r5 + lsl r4 + rol r5 + or r4,r20 + or r5,r21 + movw r20,r8 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r8,r22 + and r9,r22 + lsl r8 + rol r9 + lsl r8 + rol r9 + or r8,r20 + or r9,r21 + ldd r0,Z+32 + eor r10,r0 + ldd r0,Z+33 + eor r11,r0 + ldd r0,Z+34 + eor r12,r0 + ldd r0,Z+35 + eor r13,r0 + ldd r0,Z+36 + eor r14,r0 + ldd r0,Z+37 + eor r15,r0 + ldd r0,Z+38 + eor r24,r0 + ldd r0,Z+39 + eor r25,r0 + ldd r0,Z+40 + eor r2,r0 + ldd r0,Z+41 + eor r3,r0 + ldd r0,Z+42 + eor r4,r0 + ldd r0,Z+43 + eor r5,r0 + ldd r0,Z+44 + eor r6,r0 + ldd r0,Z+45 + eor r7,r0 + ldd r0,Z+46 + eor r8,r0 + ldd r0,Z+47 + eor r9,r0 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r17,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r17 +#endif + add r31,r19 + mov r30,r18 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + inc r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + inc r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r22,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r22,Z +#elif defined(__AVR_TINY__) + ld r22,Z +#else + lpm + mov r22,r0 +#endif + inc r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r23,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r23,Z +#elif defined(__AVR_TINY__) + ld r23,Z +#else + lpm + mov r23,r0 +#endif + inc r18 + eor r10,r20 + eor r11,r21 + eor r12,r22 + eor r13,r23 +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + ret +1447: + rcall 126b + rcall 1453f + dec r16 + brne 1447b + rjmp 2622f +1453: + mov r0,r14 + and r0,r2 + eor r10,r0 + mov r0,r15 + and r0,r3 + eor r11,r0 + mov r0,r24 + and r0,r4 + eor r12,r0 + mov r0,r25 + and r0,r5 + eor r13,r0 + mov r0,r10 + or r0,r6 + eor r14,r0 + mov r0,r11 + or r0,r7 + eor r15,r0 + mov r0,r12 + or r0,r8 + eor r24,r0 + mov r0,r13 + or r0,r9 + eor r25,r0 + mov r0,r14 + or r0,r2 + eor r6,r0 + mov r0,r15 + or r0,r3 + eor r7,r0 + mov r0,r24 + or r0,r4 + eor r8,r0 + mov r0,r25 + or r0,r5 + eor r9,r0 + mov r0,r14 + and r0,r6 + eor r2,r0 + mov r0,r15 + and r0,r7 + eor r3,r0 + mov r0,r24 + and r0,r8 + eor r4,r0 + mov r0,r25 + and r0,r9 + eor r5,r0 + mov r0,r10 + or r0,r2 + eor r14,r0 + mov r0,r11 + or r0,r3 + eor r15,r0 + mov r0,r12 + or r0,r4 + eor r24,r0 + mov r0,r13 + or r0,r5 + eor r25,r0 + mov r0,r14 + or r0,r6 + eor r10,r0 + mov r0,r15 + or r0,r7 + eor r11,r0 + mov r0,r24 + or r0,r8 + eor r12,r0 + mov r0,r25 + or r0,r9 + eor r13,r0 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Y+17 + ldd r3,Y+18 + ldd r4,Y+19 + ldd r5,Y+20 + ldd r6,Y+21 + ldd r7,Y+22 + ldd r8,Y+23 + ldd r9,Y+24 + ldd r10,Y+25 + ldd r11,Y+26 + ldd r12,Y+27 + ldd r13,Y+28 + ldd r14,Y+29 + ldd r15,Y+30 + ldd r24,Y+31 + ldd r25,Y+32 + mov r0,r6 + and r0,r14 + eor r10,r0 + mov r0,r7 + and r0,r15 + eor r11,r0 + mov r0,r8 + and r0,r24 + eor r12,r0 + mov r0,r9 + and r0,r25 + eor r13,r0 + mov r0,r10 + or r0,r2 + eor r6,r0 + mov r0,r11 + or r0,r3 + eor r7,r0 + mov r0,r12 + or r0,r4 + eor r8,r0 + mov r0,r13 + or r0,r5 + eor r9,r0 + mov r0,r6 + or r0,r14 + eor r2,r0 + mov r0,r7 + or r0,r15 + eor r3,r0 + mov r0,r8 + or r0,r24 + eor r4,r0 + mov r0,r9 + or r0,r25 + eor r5,r0 + mov r0,r6 + and r0,r2 + eor r14,r0 + mov r0,r7 + and r0,r3 + eor r15,r0 + mov r0,r8 + and r0,r4 + eor r24,r0 + mov r0,r9 + and r0,r5 + eor r25,r0 + mov r0,r10 + or r0,r14 + eor r6,r0 + mov r0,r11 + or r0,r15 + eor r7,r0 + mov r0,r12 + or r0,r24 + eor r8,r0 + mov r0,r13 + or r0,r25 + eor r9,r0 + mov r0,r6 + or r0,r2 + eor r10,r0 + mov r0,r7 + or r0,r3 + eor r11,r0 + mov r0,r8 + or r0,r4 + eor r12,r0 + mov r0,r9 + or r0,r5 + eor r13,r0 + ldd r0,Y+13 + eor r0,r2 + std Y+13,r0 + ldd r0,Y+14 + eor r0,r3 + std Y+14,r0 + ldd r0,Y+15 + eor r0,r4 + std Y+15,r0 + ldd r0,Y+16 + eor r0,r5 + std Y+16,r0 + ldd r0,Y+1 + eor r0,r6 + std Y+1,r0 + ldd r0,Y+2 + eor r0,r7 + std Y+2,r0 + ldd r0,Y+3 + eor r0,r8 + std Y+3,r0 + ldd r0,Y+4 + eor r0,r9 + std Y+4,r0 + ldd r0,Y+5 + eor r0,r10 + std Y+5,r0 + ldd r0,Y+6 + eor r0,r11 + std Y+6,r0 + ldd r0,Y+7 + eor r0,r12 + std Y+7,r0 + ldd r0,Y+8 + eor r0,r13 + std Y+8,r0 + ldd r0,Y+9 + eor r0,r14 + std Y+9,r0 + ldd r0,Y+10 + eor r0,r15 + std Y+10,r0 + ldd r0,Y+11 + eor r0,r24 + std Y+11,r0 + ldd r0,Y+12 + eor r0,r25 + std Y+12,r0 + movw r20,r2 + movw r22,r4 + movw r2,r6 + movw r4,r8 + movw r6,r10 + movw r8,r12 + movw r10,r14 + movw r12,r24 + movw r14,r20 + movw r24,r22 + eor r14,r2 + eor r15,r3 + eor r24,r4 + eor r25,r5 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+1 + ldd r23,Y+2 + eor r20,r22 + eor r21,r23 + std Y+13,r22 + std Y+14,r23 + ldd r22,Y+5 + ldd r23,Y+6 + std Y+1,r22 + std Y+2,r23 + ldd r22,Y+9 + ldd r23,Y+10 + std Y+5,r22 + std Y+6,r23 + std Y+9,r20 + std Y+10,r21 + ldd r20,Y+15 + ldd r21,Y+16 + ldd r22,Y+3 + ldd r23,Y+4 + eor r20,r22 + eor r21,r23 + std Y+15,r22 + std Y+16,r23 + ldd r22,Y+7 + ldd r23,Y+8 + std Y+3,r22 + std Y+4,r23 + ldd r22,Y+11 + ldd r23,Y+12 + std Y+7,r22 + std Y+8,r23 + std Y+11,r20 + std Y+12,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+1 + ldd r23,Y+2 + eor r20,r22 + eor r21,r23 + eor r22,r2 + eor r23,r3 + std Y+13,r22 + std Y+14,r23 + ldd r22,Y+5 + ldd r23,Y+6 + eor r22,r6 + eor r23,r7 + std Y+1,r22 + std Y+2,r23 + ldd r22,Y+9 + ldd r23,Y+10 + eor r22,r10 + eor r23,r11 + std Y+5,r22 + std Y+6,r23 + eor r20,r14 + eor r21,r15 + std Y+9,r20 + std Y+10,r21 + ldd r20,Y+15 + ldd r21,Y+16 + ldd r22,Y+3 + ldd r23,Y+4 + eor r20,r22 + eor r21,r23 + eor r22,r4 + eor r23,r5 + std Y+15,r22 + std Y+16,r23 + ldd r22,Y+7 + ldd r23,Y+8 + eor r22,r8 + eor r23,r9 + std Y+3,r22 + std Y+4,r23 + ldd r22,Y+11 + ldd r23,Y+12 + eor r22,r12 + eor r23,r13 + std Y+7,r22 + std Y+8,r23 + eor r20,r24 + eor r21,r25 + std Y+11,r20 + std Y+12,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + mov r0,r6 + and r0,r10 + eor r2,r0 + mov r0,r7 + and r0,r11 + eor r3,r0 + mov r0,r8 + and r0,r12 + eor r4,r0 + mov r0,r9 + and r0,r13 + eor r5,r0 + mov r0,r2 + or r0,r14 + eor r6,r0 + mov r0,r3 + or r0,r15 + eor r7,r0 + mov r0,r4 + or r0,r24 + eor r8,r0 + mov r0,r5 + or r0,r25 + eor r9,r0 + mov r0,r6 + or r0,r10 + eor r14,r0 + mov r0,r7 + or r0,r11 + eor r15,r0 + mov r0,r8 + or r0,r12 + eor r24,r0 + mov r0,r9 + or r0,r13 + eor r25,r0 + mov r0,r6 + and r0,r14 + eor r10,r0 + mov r0,r7 + and r0,r15 + eor r11,r0 + mov r0,r8 + and r0,r24 + eor r12,r0 + mov r0,r9 + and r0,r25 + eor r13,r0 + mov r0,r2 + or r0,r10 + eor r6,r0 + mov r0,r3 + or r0,r11 + eor r7,r0 + mov r0,r4 + or r0,r12 + eor r8,r0 + mov r0,r5 + or r0,r13 + eor r9,r0 + mov r0,r6 + or r0,r14 + eor r2,r0 + mov r0,r7 + or r0,r15 + eor r3,r0 + mov r0,r8 + or r0,r24 + eor r4,r0 + mov r0,r9 + or r0,r25 + eor r5,r0 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + mov r0,r25 + mov r25,r24 + mov r24,r0 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + mov r0,r9 + mov r9,r8 + mov r8,r0 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + mov r0,r5 + mov r5,r4 + mov r4,r0 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + mov r0,r13 + mov r13,r12 + mov r12,r0 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r3,Y+2 + ldd r4,Y+3 + ldd r5,Y+4 + ldd r6,Y+5 + ldd r7,Y+6 + ldd r8,Y+7 + ldd r9,Y+8 + ldd r10,Y+9 + ldd r11,Y+10 + ldd r12,Y+11 + ldd r13,Y+12 + ldd r14,Y+13 + ldd r15,Y+14 + ldd r24,Y+15 + ldd r25,Y+16 + mov r0,r2 + and r0,r6 + eor r14,r0 + mov r0,r3 + and r0,r7 + eor r15,r0 + mov r0,r4 + and r0,r8 + eor r24,r0 + mov r0,r5 + and r0,r9 + eor r25,r0 + mov r0,r14 + or r0,r10 + eor r2,r0 + mov r0,r15 + or r0,r11 + eor r3,r0 + mov r0,r24 + or r0,r12 + eor r4,r0 + mov r0,r25 + or r0,r13 + eor r5,r0 + mov r0,r2 + or r0,r6 + eor r10,r0 + mov r0,r3 + or r0,r7 + eor r11,r0 + mov r0,r4 + or r0,r8 + eor r12,r0 + mov r0,r5 + or r0,r9 + eor r13,r0 + mov r0,r2 + and r0,r10 + eor r6,r0 + mov r0,r3 + and r0,r11 + eor r7,r0 + mov r0,r4 + and r0,r12 + eor r8,r0 + mov r0,r5 + and r0,r13 + eor r9,r0 + mov r0,r14 + or r0,r6 + eor r2,r0 + mov r0,r15 + or r0,r7 + eor r3,r0 + mov r0,r24 + or r0,r8 + eor r4,r0 + mov r0,r25 + or r0,r9 + eor r5,r0 + mov r0,r2 + or r0,r10 + eor r14,r0 + mov r0,r3 + or r0,r11 + eor r15,r0 + mov r0,r4 + or r0,r12 + eor r24,r0 + mov r0,r5 + or r0,r13 + eor r25,r0 + mov r0,r5 + mov r5,r4 + mov r4,r0 + mov r0,r9 + mov r9,r8 + mov r8,r0 + mov r0,r13 + mov r13,r12 + mov r12,r0 + mov r0,r25 + mov r25,r24 + mov r24,r0 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Y+17 + ldd r3,Y+18 + ldd r4,Y+19 + ldd r5,Y+20 + ldd r6,Y+21 + ldd r7,Y+22 + ldd r8,Y+23 + ldd r9,Y+24 + ldd r10,Y+25 + ldd r11,Y+26 + ldd r12,Y+27 + ldd r13,Y+28 + ldd r14,Y+29 + ldd r15,Y+30 + ldd r24,Y+31 + ldd r25,Y+32 + ldd r0,Y+1 + eor r0,r14 + std Y+1,r0 + ldd r0,Y+2 + eor r0,r15 + std Y+2,r0 + ldd r0,Y+3 + eor r0,r24 + std Y+3,r0 + ldd r0,Y+4 + eor r0,r25 + std Y+4,r0 + ldd r0,Y+5 + eor r0,r6 + std Y+5,r0 + ldd r0,Y+6 + eor r0,r7 + std Y+6,r0 + ldd r0,Y+7 + eor r0,r8 + std Y+7,r0 + ldd r0,Y+8 + eor r0,r9 + std Y+8,r0 + ldd r0,Y+9 + eor r0,r2 + std Y+9,r0 + ldd r0,Y+10 + eor r0,r3 + std Y+10,r0 + ldd r0,Y+11 + eor r0,r4 + std Y+11,r0 + ldd r0,Y+12 + eor r0,r5 + std Y+12,r0 + ldd r0,Y+13 + eor r0,r10 + std Y+13,r0 + ldd r0,Y+14 + eor r0,r11 + std Y+14,r0 + ldd r0,Y+15 + eor r0,r12 + std Y+15,r0 + ldd r0,Y+16 + eor r0,r13 + std Y+16,r0 + movw r20,r14 + movw r22,r24 + movw r14,r6 + movw r24,r8 + movw r6,r2 + movw r8,r4 + movw r2,r10 + movw r4,r12 + movw r10,r20 + movw r12,r22 + eor r10,r14 + eor r11,r15 + eor r12,r24 + eor r13,r25 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+5 + ldd r23,Y+6 + eor r20,r22 + eor r21,r23 + std Y+1,r22 + std Y+2,r23 + ldd r22,Y+9 + ldd r23,Y+10 + std Y+5,r22 + std Y+6,r23 + ldd r22,Y+13 + ldd r23,Y+14 + std Y+9,r22 + std Y+10,r23 + std Y+13,r20 + std Y+14,r21 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r22,Y+7 + ldd r23,Y+8 + eor r20,r22 + eor r21,r23 + std Y+3,r22 + std Y+4,r23 + ldd r22,Y+11 + ldd r23,Y+12 + std Y+7,r22 + std Y+8,r23 + ldd r22,Y+15 + ldd r23,Y+16 + std Y+11,r22 + std Y+12,r23 + std Y+15,r20 + std Y+16,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+5 + ldd r23,Y+6 + eor r20,r22 + eor r21,r23 + eor r22,r14 + eor r23,r15 + std Y+1,r22 + std Y+2,r23 + ldd r22,Y+9 + ldd r23,Y+10 + eor r22,r6 + eor r23,r7 + std Y+5,r22 + std Y+6,r23 + ldd r22,Y+13 + ldd r23,Y+14 + eor r22,r2 + eor r23,r3 + std Y+9,r22 + std Y+10,r23 + eor r20,r10 + eor r21,r11 + std Y+13,r20 + std Y+14,r21 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r22,Y+7 + ldd r23,Y+8 + eor r20,r22 + eor r21,r23 + eor r22,r24 + eor r23,r25 + std Y+3,r22 + std Y+4,r23 + ldd r22,Y+11 + ldd r23,Y+12 + eor r22,r8 + eor r23,r9 + std Y+7,r22 + std Y+8,r23 + ldd r22,Y+15 + ldd r23,Y+16 + eor r22,r4 + eor r23,r5 + std Y+11,r22 + std Y+12,r23 + eor r20,r12 + eor r21,r13 + std Y+15,r20 + std Y+16,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + mov r0,r15 + mov r15,r14 + mov r14,r0 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + mov r0,r7 + mov r7,r6 + mov r6,r0 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + mov r0,r3 + mov r3,r2 + mov r2,r0 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + mov r0,r11 + mov r11,r10 + mov r10,r0 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r17,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r17 +#endif + add r31,r19 + mov r30,r18 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + inc r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + inc r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r22,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r22,Z +#elif defined(__AVR_TINY__) + ld r22,Z +#else + lpm + mov r22,r0 +#endif + inc r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r23,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r23,Z +#elif defined(__AVR_TINY__) + ld r23,Z +#else + lpm + mov r23,r0 +#endif + inc r18 +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + ldd r0,Z+16 + eor r14,r0 + ldd r0,Z+17 + eor r15,r0 + ldd r0,Z+18 + eor r24,r0 + ldd r0,Z+19 + eor r25,r0 + ldd r0,Z+20 + eor r6,r0 + ldd r0,Z+21 + eor r7,r0 + ldd r0,Z+22 + eor r8,r0 + ldd r0,Z+23 + eor r9,r0 + ldd r0,Z+24 + eor r2,r0 + ldd r0,Z+25 + eor r3,r0 + ldd r0,Z+26 + eor r4,r0 + ldd r0,Z+27 + eor r5,r0 + ldd r0,Z+28 + eor r10,r0 + ldd r0,Z+29 + eor r11,r0 + ldd r0,Z+30 + eor r12,r0 + ldd r0,Z+31 + eor r13,r0 + std Y+17,r14 + std Y+18,r15 + std Y+19,r24 + std Y+20,r25 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r2 + std Y+26,r3 + std Y+27,r4 + std Y+28,r5 + std Y+29,r10 + std Y+30,r11 + std Y+31,r12 + std Y+32,r13 + ldd r2,Y+1 + ldd r3,Y+2 + ldd r4,Y+3 + ldd r5,Y+4 + ldd r6,Y+5 + ldd r7,Y+6 + ldd r8,Y+7 + ldd r9,Y+8 + ldd r10,Y+9 + ldd r11,Y+10 + ldd r12,Y+11 + ldd r13,Y+12 + ldd r14,Y+13 + ldd r15,Y+14 + ldd r24,Y+15 + ldd r25,Y+16 + mov r0,r5 + mov r5,r4 + mov r4,r0 + mov r0,r9 + mov r9,r8 + mov r8,r0 + mov r0,r13 + mov r13,r12 + mov r12,r0 + mov r0,r25 + mov r25,r24 + mov r24,r0 + ld r0,Z + eor r2,r0 + ldd r0,Z+1 + eor r3,r0 + ldd r0,Z+2 + eor r4,r0 + ldd r0,Z+3 + eor r5,r0 + ldd r0,Z+4 + eor r6,r0 + ldd r0,Z+5 + eor r7,r0 + ldd r0,Z+6 + eor r8,r0 + ldd r0,Z+7 + eor r9,r0 + ldd r0,Z+8 + eor r10,r0 + ldd r0,Z+9 + eor r11,r0 + ldd r0,Z+10 + eor r12,r0 + ldd r0,Z+11 + eor r13,r0 + ldd r0,Z+12 + eor r14,r0 + ldd r0,Z+13 + eor r15,r0 + ldd r0,Z+14 + eor r24,r0 + ldd r0,Z+15 + eor r25,r0 + eor r2,r20 + eor r3,r21 + eor r4,r22 + eor r5,r23 + ret +2622: + ldd r26,Y+33 + ldd r27,Y+34 + st X+,r2 + st X+,r3 + st X+,r6 + st X+,r7 + st X+,r10 + st X+,r11 + st X+,r14 + st X+,r15 + ldd r20,Y+17 + ldd r21,Y+18 + st X+,r20 + st X+,r21 + ldd r20,Y+21 + ldd r21,Y+22 + st X+,r20 + st X+,r21 + ldd r20,Y+25 + ldd r21,Y+26 + st X+,r20 + st X+,r21 + ldd r20,Y+29 + ldd r21,Y+30 + st X+,r20 + st X+,r21 + st X+,r4 + st X+,r5 + st X+,r8 + st X+,r9 + st X+,r12 + st X+,r13 + st X+,r24 + st X+,r25 + ldd r20,Y+19 + ldd r21,Y+20 + st X+,r20 + st X+,r21 + ldd r20,Y+23 + ldd r21,Y+24 + st X+,r20 + st X+,r21 + ldd r20,Y+27 + ldd r21,Y+28 + st X+,r20 + st X+,r21 + ldd r20,Y+31 + ldd r21,Y+32 + st X+,r20 + st X+,r21 + adiw r28,34 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size saturnin_encrypt_block, .-saturnin_encrypt_block + + .text +.global saturnin_decrypt_block + .type saturnin_decrypt_block, @function +saturnin_decrypt_block: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r23 + push r22 + movw r30,r24 + movw r26,r20 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 52 + ld r2,X+ + ld r3,X+ + ld r6,X+ + ld r7,X+ + ld r10,X+ + ld r11,X+ + ld r14,X+ + ld r15,X+ + ld r20,X+ + ld r21,X+ + std Y+17,r20 + std Y+18,r21 + ld r20,X+ + ld r21,X+ + std Y+21,r20 + std Y+22,r21 + ld r20,X+ + ld r21,X+ + std Y+25,r20 + std Y+26,r21 + ld r20,X+ + ld r21,X+ + std Y+29,r20 + std Y+30,r21 + ld r4,X+ + ld r5,X+ + ld r8,X+ + ld r9,X+ + ld r12,X+ + ld r13,X+ + ld r24,X+ + ld r25,X+ + ld r20,X+ + ld r21,X+ + std Y+19,r20 + std Y+20,r21 + ld r20,X+ + ld r21,X+ + std Y+23,r20 + std Y+24,r21 + ld r20,X+ + ld r21,X+ + std Y+27,r20 + std Y+28,r21 + ld r20,X+ + ld r21,X+ + std Y+31,r20 + std Y+32,r21 + ldi r16,10 + cpi r18,60 + cpc r19,r1 + brcs 56f + ldi r16,16 + ldi r17,4 + add r18,r17 + adc r19,r1 +56: + add r18,r16 + adc r19,r1 + lsl r18 + rol r19 + lsl r18 + rol r19 + rjmp 1233f +64: + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r17,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r17 +#endif + add r31,r19 + dec r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r23,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r23,Z +#elif defined(__AVR_TINY__) + ld r23,Z +#else + lpm + mov r23,r0 +#endif + dec r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r22,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r22,Z +#elif defined(__AVR_TINY__) + ld r22,Z +#else + lpm + mov r22,r0 +#endif + dec r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + dec r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + eor r2,r20 + eor r3,r21 + eor r4,r22 + eor r5,r23 +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + ld r0,Z + eor r2,r0 + ldd r0,Z+1 + eor r3,r0 + ldd r0,Z+2 + eor r4,r0 + ldd r0,Z+3 + eor r5,r0 + ldd r0,Z+4 + eor r6,r0 + ldd r0,Z+5 + eor r7,r0 + ldd r0,Z+6 + eor r8,r0 + ldd r0,Z+7 + eor r9,r0 + ldd r0,Z+8 + eor r10,r0 + ldd r0,Z+9 + eor r11,r0 + ldd r0,Z+10 + eor r12,r0 + ldd r0,Z+11 + eor r13,r0 + ldd r0,Z+12 + eor r14,r0 + ldd r0,Z+13 + eor r15,r0 + ldd r0,Z+14 + eor r24,r0 + ldd r0,Z+15 + eor r25,r0 + mov r0,r5 + mov r5,r4 + mov r4,r0 + mov r0,r9 + mov r9,r8 + mov r8,r0 + mov r0,r13 + mov r13,r12 + mov r12,r0 + mov r0,r25 + mov r25,r24 + mov r24,r0 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r14,Y+17 + ldd r15,Y+18 + ldd r24,Y+19 + ldd r25,Y+20 + ldd r6,Y+21 + ldd r7,Y+22 + ldd r8,Y+23 + ldd r9,Y+24 + ldd r2,Y+25 + ldd r3,Y+26 + ldd r4,Y+27 + ldd r5,Y+28 + ldd r10,Y+29 + ldd r11,Y+30 + ldd r12,Y+31 + ldd r13,Y+32 + ldd r0,Z+16 + eor r14,r0 + ldd r0,Z+17 + eor r15,r0 + ldd r0,Z+18 + eor r24,r0 + ldd r0,Z+19 + eor r25,r0 + ldd r0,Z+20 + eor r6,r0 + ldd r0,Z+21 + eor r7,r0 + ldd r0,Z+22 + eor r8,r0 + ldd r0,Z+23 + eor r9,r0 + ldd r0,Z+24 + eor r2,r0 + ldd r0,Z+25 + eor r3,r0 + ldd r0,Z+26 + eor r4,r0 + ldd r0,Z+27 + eor r5,r0 + ldd r0,Z+28 + eor r10,r0 + ldd r0,Z+29 + eor r11,r0 + ldd r0,Z+30 + eor r12,r0 + ldd r0,Z+31 + eor r13,r0 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + mov r0,r25 + mov r25,r24 + mov r24,r0 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + mov r0,r9 + mov r9,r8 + mov r8,r0 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + mov r0,r5 + mov r5,r4 + mov r4,r0 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + mov r0,r13 + mov r13,r12 + mov r12,r0 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+13 + ldd r21,Y+14 + eor r20,r10 + eor r21,r11 + ldd r22,Y+9 + ldd r23,Y+10 + eor r22,r2 + eor r23,r3 + std Y+13,r22 + std Y+14,r23 + ldd r22,Y+5 + ldd r23,Y+6 + eor r22,r6 + eor r23,r7 + std Y+9,r22 + std Y+10,r23 + ldd r22,Y+1 + ldd r23,Y+2 + eor r22,r14 + eor r23,r15 + eor r20,r22 + eor r21,r23 + std Y+5,r22 + std Y+6,r23 + std Y+1,r20 + std Y+2,r21 + ldd r20,Y+15 + ldd r21,Y+16 + eor r20,r12 + eor r21,r13 + ldd r22,Y+11 + ldd r23,Y+12 + eor r22,r4 + eor r23,r5 + std Y+15,r22 + std Y+16,r23 + ldd r22,Y+7 + ldd r23,Y+8 + eor r22,r8 + eor r23,r9 + std Y+11,r22 + std Y+12,r23 + ldd r22,Y+3 + ldd r23,Y+4 + eor r22,r24 + eor r23,r25 + eor r20,r22 + eor r21,r23 + std Y+7,r22 + std Y+8,r23 + std Y+3,r20 + std Y+4,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+9 + ldd r23,Y+10 + std Y+13,r22 + std Y+14,r23 + ldd r22,Y+5 + ldd r23,Y+6 + std Y+9,r22 + std Y+10,r23 + ldd r22,Y+1 + ldd r23,Y+2 + eor r20,r22 + eor r21,r23 + std Y+5,r22 + std Y+6,r23 + std Y+1,r20 + std Y+2,r21 + ldd r20,Y+15 + ldd r21,Y+16 + ldd r22,Y+11 + ldd r23,Y+12 + std Y+15,r22 + std Y+16,r23 + ldd r22,Y+7 + ldd r23,Y+8 + std Y+11,r22 + std Y+12,r23 + ldd r22,Y+3 + ldd r23,Y+4 + eor r20,r22 + eor r21,r23 + std Y+7,r22 + std Y+8,r23 + std Y+3,r20 + std Y+4,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + movw r20,r10 + movw r22,r12 + movw r10,r2 + movw r12,r4 + movw r2,r6 + movw r4,r8 + movw r6,r14 + movw r8,r24 + movw r14,r20 + movw r24,r22 + eor r14,r6 + eor r15,r7 + eor r24,r8 + eor r25,r9 + ldd r0,Y+1 + eor r0,r14 + std Y+1,r0 + ldd r0,Y+2 + eor r0,r15 + std Y+2,r0 + ldd r0,Y+3 + eor r0,r24 + std Y+3,r0 + ldd r0,Y+4 + eor r0,r25 + std Y+4,r0 + ldd r0,Y+5 + eor r0,r6 + std Y+5,r0 + ldd r0,Y+6 + eor r0,r7 + std Y+6,r0 + ldd r0,Y+7 + eor r0,r8 + std Y+7,r0 + ldd r0,Y+8 + eor r0,r9 + std Y+8,r0 + ldd r0,Y+9 + eor r0,r2 + std Y+9,r0 + ldd r0,Y+10 + eor r0,r3 + std Y+10,r0 + ldd r0,Y+11 + eor r0,r4 + std Y+11,r0 + ldd r0,Y+12 + eor r0,r5 + std Y+12,r0 + ldd r0,Y+13 + eor r0,r10 + std Y+13,r0 + ldd r0,Y+14 + eor r0,r11 + std Y+14,r0 + ldd r0,Y+15 + eor r0,r12 + std Y+15,r0 + ldd r0,Y+16 + eor r0,r13 + std Y+16,r0 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r3,Y+2 + ldd r4,Y+3 + ldd r5,Y+4 + ldd r6,Y+5 + ldd r7,Y+6 + ldd r8,Y+7 + ldd r9,Y+8 + ldd r10,Y+9 + ldd r11,Y+10 + ldd r12,Y+11 + ldd r13,Y+12 + ldd r14,Y+13 + ldd r15,Y+14 + ldd r24,Y+15 + ldd r25,Y+16 + mov r0,r5 + mov r5,r4 + mov r4,r0 + mov r0,r9 + mov r9,r8 + mov r8,r0 + mov r0,r13 + mov r13,r12 + mov r12,r0 + mov r0,r25 + mov r25,r24 + mov r24,r0 + mov r0,r2 + or r0,r10 + eor r14,r0 + mov r0,r3 + or r0,r11 + eor r15,r0 + mov r0,r4 + or r0,r12 + eor r24,r0 + mov r0,r5 + or r0,r13 + eor r25,r0 + mov r0,r14 + or r0,r6 + eor r2,r0 + mov r0,r15 + or r0,r7 + eor r3,r0 + mov r0,r24 + or r0,r8 + eor r4,r0 + mov r0,r25 + or r0,r9 + eor r5,r0 + mov r0,r2 + and r0,r10 + eor r6,r0 + mov r0,r3 + and r0,r11 + eor r7,r0 + mov r0,r4 + and r0,r12 + eor r8,r0 + mov r0,r5 + and r0,r13 + eor r9,r0 + mov r0,r2 + or r0,r6 + eor r10,r0 + mov r0,r3 + or r0,r7 + eor r11,r0 + mov r0,r4 + or r0,r8 + eor r12,r0 + mov r0,r5 + or r0,r9 + eor r13,r0 + mov r0,r14 + or r0,r10 + eor r2,r0 + mov r0,r15 + or r0,r11 + eor r3,r0 + mov r0,r24 + or r0,r12 + eor r4,r0 + mov r0,r25 + or r0,r13 + eor r5,r0 + mov r0,r2 + and r0,r6 + eor r14,r0 + mov r0,r3 + and r0,r7 + eor r15,r0 + mov r0,r4 + and r0,r8 + eor r24,r0 + mov r0,r5 + and r0,r9 + eor r25,r0 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Y+17 + ldd r3,Y+18 + ldd r4,Y+19 + ldd r5,Y+20 + ldd r6,Y+21 + ldd r7,Y+22 + ldd r8,Y+23 + ldd r9,Y+24 + ldd r10,Y+25 + ldd r11,Y+26 + ldd r12,Y+27 + ldd r13,Y+28 + ldd r14,Y+29 + ldd r15,Y+30 + ldd r24,Y+31 + ldd r25,Y+32 + mov r0,r15 + mov r15,r14 + mov r14,r0 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r14 + rol r15 + adc r14,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + lsl r24 + rol r25 + adc r24,r1 + mov r0,r7 + mov r7,r6 + mov r6,r0 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r6 + rol r7 + adc r6,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + lsl r8 + rol r9 + adc r8,r1 + mov r0,r3 + mov r3,r2 + mov r2,r0 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r2 + rol r3 + adc r2,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + lsl r4 + rol r5 + adc r4,r1 + mov r0,r11 + mov r11,r10 + mov r10,r0 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r10 + rol r11 + adc r10,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + lsl r12 + rol r13 + adc r12,r1 + mov r0,r6 + or r0,r14 + eor r2,r0 + mov r0,r7 + or r0,r15 + eor r3,r0 + mov r0,r8 + or r0,r24 + eor r4,r0 + mov r0,r9 + or r0,r25 + eor r5,r0 + mov r0,r2 + or r0,r10 + eor r6,r0 + mov r0,r3 + or r0,r11 + eor r7,r0 + mov r0,r4 + or r0,r12 + eor r8,r0 + mov r0,r5 + or r0,r13 + eor r9,r0 + mov r0,r6 + and r0,r14 + eor r10,r0 + mov r0,r7 + and r0,r15 + eor r11,r0 + mov r0,r8 + and r0,r24 + eor r12,r0 + mov r0,r9 + and r0,r25 + eor r13,r0 + mov r0,r6 + or r0,r10 + eor r14,r0 + mov r0,r7 + or r0,r11 + eor r15,r0 + mov r0,r8 + or r0,r12 + eor r24,r0 + mov r0,r9 + or r0,r13 + eor r25,r0 + mov r0,r2 + or r0,r14 + eor r6,r0 + mov r0,r3 + or r0,r15 + eor r7,r0 + mov r0,r4 + or r0,r24 + eor r8,r0 + mov r0,r5 + or r0,r25 + eor r9,r0 + mov r0,r6 + and r0,r10 + eor r2,r0 + mov r0,r7 + and r0,r11 + eor r3,r0 + mov r0,r8 + and r0,r12 + eor r4,r0 + mov r0,r9 + and r0,r13 + eor r5,r0 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+9 + ldd r21,Y+10 + eor r20,r14 + eor r21,r15 + ldd r22,Y+5 + ldd r23,Y+6 + eor r22,r10 + eor r23,r11 + std Y+9,r22 + std Y+10,r23 + ldd r22,Y+1 + ldd r23,Y+2 + eor r22,r6 + eor r23,r7 + std Y+5,r22 + std Y+6,r23 + ldd r22,Y+13 + ldd r23,Y+14 + eor r22,r2 + eor r23,r3 + eor r20,r22 + eor r21,r23 + std Y+1,r22 + std Y+2,r23 + std Y+13,r20 + std Y+14,r21 + ldd r20,Y+11 + ldd r21,Y+12 + eor r20,r24 + eor r21,r25 + ldd r22,Y+7 + ldd r23,Y+8 + eor r22,r12 + eor r23,r13 + std Y+11,r22 + std Y+12,r23 + ldd r22,Y+3 + ldd r23,Y+4 + eor r22,r8 + eor r23,r9 + std Y+7,r22 + std Y+8,r23 + ldd r22,Y+15 + ldd r23,Y+16 + eor r22,r4 + eor r23,r5 + eor r20,r22 + eor r21,r23 + std Y+3,r22 + std Y+4,r23 + std Y+15,r20 + std Y+16,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+5 + ldd r23,Y+6 + std Y+9,r22 + std Y+10,r23 + ldd r22,Y+1 + ldd r23,Y+2 + std Y+5,r22 + std Y+6,r23 + ldd r22,Y+13 + ldd r23,Y+14 + eor r20,r22 + eor r21,r23 + std Y+1,r22 + std Y+2,r23 + std Y+13,r20 + std Y+14,r21 + ldd r20,Y+11 + ldd r21,Y+12 + ldd r22,Y+7 + ldd r23,Y+8 + std Y+11,r22 + std Y+12,r23 + ldd r22,Y+3 + ldd r23,Y+4 + std Y+7,r22 + std Y+8,r23 + ldd r22,Y+15 + ldd r23,Y+16 + eor r20,r22 + eor r21,r23 + std Y+3,r22 + std Y+4,r23 + std Y+15,r20 + std Y+16,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + movw r20,r14 + movw r22,r24 + movw r14,r10 + movw r24,r12 + movw r10,r6 + movw r12,r8 + movw r6,r2 + movw r8,r4 + movw r2,r20 + movw r4,r22 + eor r2,r6 + eor r3,r7 + eor r4,r8 + eor r5,r9 + ldd r0,Y+13 + eor r0,r2 + std Y+13,r0 + ldd r0,Y+14 + eor r0,r3 + std Y+14,r0 + ldd r0,Y+15 + eor r0,r4 + std Y+15,r0 + ldd r0,Y+16 + eor r0,r5 + std Y+16,r0 + ldd r0,Y+1 + eor r0,r6 + std Y+1,r0 + ldd r0,Y+2 + eor r0,r7 + std Y+2,r0 + ldd r0,Y+3 + eor r0,r8 + std Y+3,r0 + ldd r0,Y+4 + eor r0,r9 + std Y+4,r0 + ldd r0,Y+5 + eor r0,r10 + std Y+5,r0 + ldd r0,Y+6 + eor r0,r11 + std Y+6,r0 + ldd r0,Y+7 + eor r0,r12 + std Y+7,r0 + ldd r0,Y+8 + eor r0,r13 + std Y+8,r0 + ldd r0,Y+9 + eor r0,r14 + std Y+9,r0 + ldd r0,Y+10 + eor r0,r15 + std Y+10,r0 + ldd r0,Y+11 + eor r0,r24 + std Y+11,r0 + ldd r0,Y+12 + eor r0,r25 + std Y+12,r0 + mov r0,r6 + or r0,r2 + eor r10,r0 + mov r0,r7 + or r0,r3 + eor r11,r0 + mov r0,r8 + or r0,r4 + eor r12,r0 + mov r0,r9 + or r0,r5 + eor r13,r0 + mov r0,r10 + or r0,r14 + eor r6,r0 + mov r0,r11 + or r0,r15 + eor r7,r0 + mov r0,r12 + or r0,r24 + eor r8,r0 + mov r0,r13 + or r0,r25 + eor r9,r0 + mov r0,r6 + and r0,r2 + eor r14,r0 + mov r0,r7 + and r0,r3 + eor r15,r0 + mov r0,r8 + and r0,r4 + eor r24,r0 + mov r0,r9 + and r0,r5 + eor r25,r0 + mov r0,r6 + or r0,r14 + eor r2,r0 + mov r0,r7 + or r0,r15 + eor r3,r0 + mov r0,r8 + or r0,r24 + eor r4,r0 + mov r0,r9 + or r0,r25 + eor r5,r0 + mov r0,r10 + or r0,r2 + eor r6,r0 + mov r0,r11 + or r0,r3 + eor r7,r0 + mov r0,r12 + or r0,r4 + eor r8,r0 + mov r0,r13 + or r0,r5 + eor r9,r0 + mov r0,r6 + and r0,r14 + eor r10,r0 + mov r0,r7 + and r0,r15 + eor r11,r0 + mov r0,r8 + and r0,r24 + eor r12,r0 + mov r0,r9 + and r0,r25 + eor r13,r0 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r3,Y+2 + ldd r4,Y+3 + ldd r5,Y+4 + ldd r6,Y+5 + ldd r7,Y+6 + ldd r8,Y+7 + ldd r9,Y+8 + ldd r10,Y+9 + ldd r11,Y+10 + ldd r12,Y+11 + ldd r13,Y+12 + ldd r14,Y+13 + ldd r15,Y+14 + ldd r24,Y+15 + ldd r25,Y+16 + mov r0,r14 + or r0,r6 + eor r10,r0 + mov r0,r15 + or r0,r7 + eor r11,r0 + mov r0,r24 + or r0,r8 + eor r12,r0 + mov r0,r25 + or r0,r9 + eor r13,r0 + mov r0,r10 + or r0,r2 + eor r14,r0 + mov r0,r11 + or r0,r3 + eor r15,r0 + mov r0,r12 + or r0,r4 + eor r24,r0 + mov r0,r13 + or r0,r5 + eor r25,r0 + mov r0,r14 + and r0,r6 + eor r2,r0 + mov r0,r15 + and r0,r7 + eor r3,r0 + mov r0,r24 + and r0,r8 + eor r4,r0 + mov r0,r25 + and r0,r9 + eor r5,r0 + mov r0,r14 + or r0,r2 + eor r6,r0 + mov r0,r15 + or r0,r3 + eor r7,r0 + mov r0,r24 + or r0,r4 + eor r8,r0 + mov r0,r25 + or r0,r5 + eor r9,r0 + mov r0,r10 + or r0,r6 + eor r14,r0 + mov r0,r11 + or r0,r7 + eor r15,r0 + mov r0,r12 + or r0,r8 + eor r24,r0 + mov r0,r13 + or r0,r9 + eor r25,r0 + mov r0,r14 + and r0,r2 + eor r10,r0 + mov r0,r15 + and r0,r3 + eor r11,r0 + mov r0,r24 + and r0,r4 + eor r12,r0 + mov r0,r25 + and r0,r5 + eor r13,r0 + ret +1233: + rcall 64b + rcall 1239f + subi r16,2 + brne 1233b + rjmp 2560f +1239: + ldd r0,Z+32 + eor r10,r0 + ldd r0,Z+33 + eor r11,r0 + ldd r0,Z+34 + eor r12,r0 + ldd r0,Z+35 + eor r13,r0 + ldd r0,Z+36 + eor r14,r0 + ldd r0,Z+37 + eor r15,r0 + ldd r0,Z+38 + eor r24,r0 + ldd r0,Z+39 + eor r25,r0 + ldd r0,Z+40 + eor r2,r0 + ldd r0,Z+41 + eor r3,r0 + ldd r0,Z+42 + eor r4,r0 + ldd r0,Z+43 + eor r5,r0 + ldd r0,Z+44 + eor r6,r0 + ldd r0,Z+45 + eor r7,r0 + ldd r0,Z+46 + eor r8,r0 + ldd r0,Z+47 + eor r9,r0 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r17,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r17 +#endif + add r31,r19 + dec r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r23,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r23,Z +#elif defined(__AVR_TINY__) + ld r23,Z +#else + lpm + mov r23,r0 +#endif + dec r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r22,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r22,Z +#elif defined(__AVR_TINY__) + ld r22,Z +#else + lpm + mov r22,r0 +#endif + dec r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r21,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r21,Z +#elif defined(__AVR_TINY__) + ld r21,Z +#else + lpm + mov r21,r0 +#endif + dec r18 + mov r30,r18 +#if defined(RAMPZ) + elpm r20,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r20,Z +#elif defined(__AVR_TINY__) + ld r20,Z +#else + lpm + mov r20,r0 +#endif + eor r10,r20 + eor r11,r21 + eor r12,r22 + eor r13,r23 +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + movw r20,r12 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r12,r22 + and r13,r22 + lsl r12 + rol r13 + lsl r12 + rol r13 + or r12,r20 + or r13,r21 + movw r20,r24 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r24,r22 + and r25,r22 + lsl r24 + rol r25 + lsl r24 + rol r25 + or r24,r20 + or r25,r21 + movw r20,r4 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r4,r22 + and r5,r22 + lsl r4 + rol r5 + lsl r4 + rol r5 + or r4,r20 + or r5,r21 + movw r20,r8 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r8,r22 + and r9,r22 + lsl r8 + rol r9 + lsl r8 + rol r9 + or r8,r20 + or r9,r21 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Y+17 + ldd r3,Y+18 + ldd r4,Y+19 + ldd r5,Y+20 + ldd r6,Y+21 + ldd r7,Y+22 + ldd r8,Y+23 + ldd r9,Y+24 + ldd r10,Y+25 + ldd r11,Y+26 + ldd r12,Y+27 + ldd r13,Y+28 + ldd r14,Y+29 + ldd r15,Y+30 + ldd r24,Y+31 + ldd r25,Y+32 + ldd r0,Z+48 + eor r10,r0 + ldd r0,Z+49 + eor r11,r0 + ldd r0,Z+50 + eor r12,r0 + ldd r0,Z+51 + eor r13,r0 + ldd r0,Z+52 + eor r6,r0 + ldd r0,Z+53 + eor r7,r0 + ldd r0,Z+54 + eor r8,r0 + ldd r0,Z+55 + eor r9,r0 + ldd r0,Z+56 + eor r14,r0 + ldd r0,Z+57 + eor r15,r0 + ldd r0,Z+58 + eor r24,r0 + ldd r0,Z+59 + eor r25,r0 + ldd r0,Z+60 + eor r2,r0 + ldd r0,Z+61 + eor r3,r0 + ldd r0,Z+62 + eor r4,r0 + ldd r0,Z+63 + eor r5,r0 + movw r20,r10 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r10,r22 + and r11,r22 + lsl r10 + rol r11 + or r10,r20 + or r11,r21 + movw r20,r12 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r12,r22 + and r13,r22 + lsl r12 + rol r13 + lsl r12 + rol r13 + lsl r12 + rol r13 + or r12,r20 + or r13,r21 + movw r20,r6 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r6,r22 + and r7,r22 + lsl r6 + rol r7 + or r6,r20 + or r7,r21 + movw r20,r8 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r8,r22 + and r9,r22 + lsl r8 + rol r9 + lsl r8 + rol r9 + lsl r8 + rol r9 + or r8,r20 + or r9,r21 + movw r20,r14 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r14,r22 + and r15,r22 + lsl r14 + rol r15 + or r14,r20 + or r15,r21 + movw r20,r24 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r24,r22 + and r25,r22 + lsl r24 + rol r25 + lsl r24 + rol r25 + lsl r24 + rol r25 + or r24,r20 + or r25,r21 + movw r20,r2 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r2,r22 + and r3,r22 + lsl r2 + rol r3 + or r2,r20 + or r3,r21 + movw r20,r4 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r4,r22 + and r5,r22 + lsl r4 + rol r5 + lsl r4 + rol r5 + lsl r4 + rol r5 + or r4,r20 + or r5,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+5 + ldd r21,Y+6 + eor r20,r2 + eor r21,r3 + ldd r22,Y+1 + ldd r23,Y+2 + eor r22,r14 + eor r23,r15 + std Y+5,r22 + std Y+6,r23 + ldd r22,Y+13 + ldd r23,Y+14 + eor r22,r6 + eor r23,r7 + std Y+1,r22 + std Y+2,r23 + ldd r22,Y+9 + ldd r23,Y+10 + eor r22,r10 + eor r23,r11 + eor r20,r22 + eor r21,r23 + std Y+13,r22 + std Y+14,r23 + std Y+9,r20 + std Y+10,r21 + ldd r20,Y+7 + ldd r21,Y+8 + eor r20,r4 + eor r21,r5 + ldd r22,Y+3 + ldd r23,Y+4 + eor r22,r24 + eor r23,r25 + std Y+7,r22 + std Y+8,r23 + ldd r22,Y+15 + ldd r23,Y+16 + eor r22,r8 + eor r23,r9 + std Y+3,r22 + std Y+4,r23 + ldd r22,Y+11 + ldd r23,Y+12 + eor r22,r12 + eor r23,r13 + eor r20,r22 + eor r21,r23 + std Y+15,r22 + std Y+16,r23 + std Y+11,r20 + std Y+12,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+1 + ldd r23,Y+2 + std Y+5,r22 + std Y+6,r23 + ldd r22,Y+13 + ldd r23,Y+14 + std Y+1,r22 + std Y+2,r23 + ldd r22,Y+9 + ldd r23,Y+10 + eor r20,r22 + eor r21,r23 + std Y+13,r22 + std Y+14,r23 + std Y+9,r20 + std Y+10,r21 + ldd r20,Y+7 + ldd r21,Y+8 + ldd r22,Y+3 + ldd r23,Y+4 + std Y+7,r22 + std Y+8,r23 + ldd r22,Y+15 + ldd r23,Y+16 + std Y+3,r22 + std Y+4,r23 + ldd r22,Y+11 + ldd r23,Y+12 + eor r20,r22 + eor r21,r23 + std Y+15,r22 + std Y+16,r23 + std Y+11,r20 + std Y+12,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + movw r20,r2 + movw r22,r4 + movw r2,r14 + movw r4,r24 + movw r14,r6 + movw r24,r8 + movw r6,r10 + movw r8,r12 + movw r10,r20 + movw r12,r22 + eor r10,r6 + eor r11,r7 + eor r12,r8 + eor r13,r9 + ldd r0,Y+9 + eor r0,r10 + std Y+9,r0 + ldd r0,Y+10 + eor r0,r11 + std Y+10,r0 + ldd r0,Y+11 + eor r0,r12 + std Y+11,r0 + ldd r0,Y+12 + eor r0,r13 + std Y+12,r0 + ldd r0,Y+13 + eor r0,r6 + std Y+13,r0 + ldd r0,Y+14 + eor r0,r7 + std Y+14,r0 + ldd r0,Y+15 + eor r0,r8 + std Y+15,r0 + ldd r0,Y+16 + eor r0,r9 + std Y+16,r0 + ldd r0,Y+1 + eor r0,r14 + std Y+1,r0 + ldd r0,Y+2 + eor r0,r15 + std Y+2,r0 + ldd r0,Y+3 + eor r0,r24 + std Y+3,r0 + ldd r0,Y+4 + eor r0,r25 + std Y+4,r0 + ldd r0,Y+5 + eor r0,r2 + std Y+5,r0 + ldd r0,Y+6 + eor r0,r3 + std Y+6,r0 + ldd r0,Y+7 + eor r0,r4 + std Y+7,r0 + ldd r0,Y+8 + eor r0,r5 + std Y+8,r0 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r3,Y+2 + ldd r4,Y+3 + ldd r5,Y+4 + ldd r6,Y+5 + ldd r7,Y+6 + ldd r8,Y+7 + ldd r9,Y+8 + ldd r10,Y+9 + ldd r11,Y+10 + ldd r12,Y+11 + ldd r13,Y+12 + ldd r14,Y+13 + ldd r15,Y+14 + ldd r24,Y+15 + ldd r25,Y+16 + movw r20,r8 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r8,r22 + and r9,r22 + lsl r8 + rol r9 + lsl r8 + rol r9 + or r8,r20 + or r9,r21 + movw r20,r12 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r12,r22 + and r13,r22 + lsl r12 + rol r13 + lsl r12 + rol r13 + or r12,r20 + or r13,r21 + movw r20,r24 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r24,r22 + and r25,r22 + lsl r24 + rol r25 + lsl r24 + rol r25 + or r24,r20 + or r25,r21 + movw r20,r4 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,51 + and r20,r22 + and r21,r22 + and r4,r22 + and r5,r22 + lsl r4 + rol r5 + lsl r4 + rol r5 + or r4,r20 + or r5,r21 + mov r0,r10 + or r0,r2 + eor r6,r0 + mov r0,r11 + or r0,r3 + eor r7,r0 + mov r0,r12 + or r0,r4 + eor r8,r0 + mov r0,r13 + or r0,r5 + eor r9,r0 + mov r0,r6 + or r0,r14 + eor r10,r0 + mov r0,r7 + or r0,r15 + eor r11,r0 + mov r0,r8 + or r0,r24 + eor r12,r0 + mov r0,r9 + or r0,r25 + eor r13,r0 + mov r0,r10 + and r0,r2 + eor r14,r0 + mov r0,r11 + and r0,r3 + eor r15,r0 + mov r0,r12 + and r0,r4 + eor r24,r0 + mov r0,r13 + and r0,r5 + eor r25,r0 + mov r0,r10 + or r0,r14 + eor r2,r0 + mov r0,r11 + or r0,r15 + eor r3,r0 + mov r0,r12 + or r0,r24 + eor r4,r0 + mov r0,r13 + or r0,r25 + eor r5,r0 + mov r0,r6 + or r0,r2 + eor r10,r0 + mov r0,r7 + or r0,r3 + eor r11,r0 + mov r0,r8 + or r0,r4 + eor r12,r0 + mov r0,r9 + or r0,r5 + eor r13,r0 + mov r0,r10 + and r0,r14 + eor r6,r0 + mov r0,r11 + and r0,r15 + eor r7,r0 + mov r0,r12 + and r0,r24 + eor r8,r0 + mov r0,r13 + and r0,r25 + eor r9,r0 + std Y+1,r2 + std Y+2,r3 + std Y+3,r4 + std Y+4,r5 + std Y+5,r6 + std Y+6,r7 + std Y+7,r8 + std Y+8,r9 + std Y+9,r10 + std Y+10,r11 + std Y+11,r12 + std Y+12,r13 + std Y+13,r14 + std Y+14,r15 + std Y+15,r24 + std Y+16,r25 + ldd r2,Y+17 + ldd r3,Y+18 + ldd r4,Y+19 + ldd r5,Y+20 + ldd r6,Y+21 + ldd r7,Y+22 + ldd r8,Y+23 + ldd r9,Y+24 + ldd r10,Y+25 + ldd r11,Y+26 + ldd r12,Y+27 + ldd r13,Y+28 + ldd r14,Y+29 + ldd r15,Y+30 + ldd r24,Y+31 + ldd r25,Y+32 + movw r20,r24 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r24,r22 + and r25,r22 + lsl r24 + rol r25 + or r24,r20 + or r25,r21 + movw r20,r14 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r14,r22 + and r15,r22 + lsl r14 + rol r15 + lsl r14 + rol r15 + lsl r14 + rol r15 + or r14,r20 + or r15,r21 + movw r20,r8 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r8,r22 + and r9,r22 + lsl r8 + rol r9 + or r8,r20 + or r9,r21 + movw r20,r6 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r6,r22 + and r7,r22 + lsl r6 + rol r7 + lsl r6 + rol r7 + lsl r6 + rol r7 + or r6,r20 + or r7,r21 + movw r20,r4 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r4,r22 + and r5,r22 + lsl r4 + rol r5 + or r4,r20 + or r5,r21 + movw r20,r2 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r2,r22 + and r3,r22 + lsl r2 + rol r3 + lsl r2 + rol r3 + lsl r2 + rol r3 + or r2,r20 + or r3,r21 + movw r20,r12 + lsr r21 + ror r20 + lsr r21 + ror r20 + lsr r21 + ror r20 + ldi r22,17 + and r20,r22 + and r21,r22 + ldi r22,119 + and r12,r22 + and r13,r22 + lsl r12 + rol r13 + or r12,r20 + or r13,r21 + movw r20,r10 + lsr r21 + ror r20 + and r20,r22 + and r21,r22 + ldi r22,17 + and r10,r22 + and r11,r22 + lsl r10 + rol r11 + lsl r10 + rol r11 + lsl r10 + rol r11 + or r10,r20 + or r11,r21 + mov r0,r6 + or r0,r10 + eor r14,r0 + mov r0,r7 + or r0,r11 + eor r15,r0 + mov r0,r8 + or r0,r12 + eor r24,r0 + mov r0,r9 + or r0,r13 + eor r25,r0 + mov r0,r14 + or r0,r2 + eor r6,r0 + mov r0,r15 + or r0,r3 + eor r7,r0 + mov r0,r24 + or r0,r4 + eor r8,r0 + mov r0,r25 + or r0,r5 + eor r9,r0 + mov r0,r6 + and r0,r10 + eor r2,r0 + mov r0,r7 + and r0,r11 + eor r3,r0 + mov r0,r8 + and r0,r12 + eor r4,r0 + mov r0,r9 + and r0,r13 + eor r5,r0 + mov r0,r6 + or r0,r2 + eor r10,r0 + mov r0,r7 + or r0,r3 + eor r11,r0 + mov r0,r8 + or r0,r4 + eor r12,r0 + mov r0,r9 + or r0,r5 + eor r13,r0 + mov r0,r14 + or r0,r10 + eor r6,r0 + mov r0,r15 + or r0,r11 + eor r7,r0 + mov r0,r24 + or r0,r12 + eor r8,r0 + mov r0,r25 + or r0,r13 + eor r9,r0 + mov r0,r6 + and r0,r2 + eor r14,r0 + mov r0,r7 + and r0,r3 + eor r15,r0 + mov r0,r8 + and r0,r4 + eor r24,r0 + mov r0,r9 + and r0,r5 + eor r25,r0 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + ldd r20,Y+1 + ldd r21,Y+2 + eor r20,r10 + eor r21,r11 + ldd r22,Y+13 + ldd r23,Y+14 + eor r22,r2 + eor r23,r3 + std Y+1,r22 + std Y+2,r23 + ldd r22,Y+9 + ldd r23,Y+10 + eor r22,r6 + eor r23,r7 + std Y+13,r22 + std Y+14,r23 + ldd r22,Y+5 + ldd r23,Y+6 + eor r22,r14 + eor r23,r15 + eor r20,r22 + eor r21,r23 + std Y+9,r22 + std Y+10,r23 + std Y+5,r20 + std Y+6,r21 + ldd r20,Y+3 + ldd r21,Y+4 + eor r20,r12 + eor r21,r13 + ldd r22,Y+15 + ldd r23,Y+16 + eor r22,r4 + eor r23,r5 + std Y+3,r22 + std Y+4,r23 + ldd r22,Y+11 + ldd r23,Y+12 + eor r22,r8 + eor r23,r9 + std Y+15,r22 + std Y+16,r23 + ldd r22,Y+7 + ldd r23,Y+8 + eor r22,r24 + eor r23,r25 + eor r20,r22 + eor r21,r23 + std Y+11,r22 + std Y+12,r23 + std Y+7,r20 + std Y+8,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+13 + ldd r23,Y+14 + std Y+1,r22 + std Y+2,r23 + ldd r22,Y+9 + ldd r23,Y+10 + std Y+13,r22 + std Y+14,r23 + ldd r22,Y+5 + ldd r23,Y+6 + eor r20,r22 + eor r21,r23 + std Y+9,r22 + std Y+10,r23 + std Y+5,r20 + std Y+6,r21 + ldd r20,Y+3 + ldd r21,Y+4 + ldd r22,Y+15 + ldd r23,Y+16 + std Y+3,r22 + std Y+4,r23 + ldd r22,Y+11 + ldd r23,Y+12 + std Y+15,r22 + std Y+16,r23 + ldd r22,Y+7 + ldd r23,Y+8 + eor r20,r22 + eor r21,r23 + std Y+11,r22 + std Y+12,r23 + std Y+7,r20 + std Y+8,r21 + ldd r20,Y+5 + ldd r21,Y+6 + ldd r22,Y+7 + ldd r23,Y+8 + eor r14,r22 + eor r15,r23 + eor r24,r20 + eor r25,r21 + ldd r20,Y+9 + ldd r21,Y+10 + ldd r22,Y+11 + ldd r23,Y+12 + eor r6,r22 + eor r7,r23 + eor r8,r20 + eor r9,r21 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + ldd r23,Y+16 + eor r2,r22 + eor r3,r23 + eor r4,r20 + eor r5,r21 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + eor r10,r22 + eor r11,r23 + eor r12,r20 + eor r13,r21 + movw r20,r10 + movw r22,r12 + movw r10,r2 + movw r12,r4 + movw r2,r6 + movw r4,r8 + movw r6,r14 + movw r8,r24 + movw r14,r20 + movw r24,r22 + eor r14,r6 + eor r15,r7 + eor r24,r8 + eor r25,r9 + ldd r0,Y+5 + eor r0,r14 + std Y+5,r0 + ldd r0,Y+6 + eor r0,r15 + std Y+6,r0 + ldd r0,Y+7 + eor r0,r24 + std Y+7,r0 + ldd r0,Y+8 + eor r0,r25 + std Y+8,r0 + ldd r0,Y+9 + eor r0,r6 + std Y+9,r0 + ldd r0,Y+10 + eor r0,r7 + std Y+10,r0 + ldd r0,Y+11 + eor r0,r8 + std Y+11,r0 + ldd r0,Y+12 + eor r0,r9 + std Y+12,r0 + ldd r0,Y+13 + eor r0,r2 + std Y+13,r0 + ldd r0,Y+14 + eor r0,r3 + std Y+14,r0 + ldd r0,Y+15 + eor r0,r4 + std Y+15,r0 + ldd r0,Y+16 + eor r0,r5 + std Y+16,r0 + ldd r0,Y+1 + eor r0,r10 + std Y+1,r0 + ldd r0,Y+2 + eor r0,r11 + std Y+2,r0 + ldd r0,Y+3 + eor r0,r12 + std Y+3,r0 + ldd r0,Y+4 + eor r0,r13 + std Y+4,r0 + mov r0,r6 + or r0,r14 + eor r2,r0 + mov r0,r7 + or r0,r15 + eor r3,r0 + mov r0,r8 + or r0,r24 + eor r4,r0 + mov r0,r9 + or r0,r25 + eor r5,r0 + mov r0,r2 + or r0,r10 + eor r6,r0 + mov r0,r3 + or r0,r11 + eor r7,r0 + mov r0,r4 + or r0,r12 + eor r8,r0 + mov r0,r5 + or r0,r13 + eor r9,r0 + mov r0,r6 + and r0,r14 + eor r10,r0 + mov r0,r7 + and r0,r15 + eor r11,r0 + mov r0,r8 + and r0,r24 + eor r12,r0 + mov r0,r9 + and r0,r25 + eor r13,r0 + mov r0,r6 + or r0,r10 + eor r14,r0 + mov r0,r7 + or r0,r11 + eor r15,r0 + mov r0,r8 + or r0,r12 + eor r24,r0 + mov r0,r9 + or r0,r13 + eor r25,r0 + mov r0,r2 + or r0,r14 + eor r6,r0 + mov r0,r3 + or r0,r15 + eor r7,r0 + mov r0,r4 + or r0,r24 + eor r8,r0 + mov r0,r5 + or r0,r25 + eor r9,r0 + mov r0,r6 + and r0,r10 + eor r2,r0 + mov r0,r7 + and r0,r11 + eor r3,r0 + mov r0,r8 + and r0,r12 + eor r4,r0 + mov r0,r9 + and r0,r13 + eor r5,r0 + std Y+17,r2 + std Y+18,r3 + std Y+19,r4 + std Y+20,r5 + std Y+21,r6 + std Y+22,r7 + std Y+23,r8 + std Y+24,r9 + std Y+25,r10 + std Y+26,r11 + std Y+27,r12 + std Y+28,r13 + std Y+29,r14 + std Y+30,r15 + std Y+31,r24 + std Y+32,r25 + ldd r2,Y+1 + ldd r3,Y+2 + ldd r4,Y+3 + ldd r5,Y+4 + ldd r6,Y+5 + ldd r7,Y+6 + ldd r8,Y+7 + ldd r9,Y+8 + ldd r10,Y+9 + ldd r11,Y+10 + ldd r12,Y+11 + ldd r13,Y+12 + ldd r14,Y+13 + ldd r15,Y+14 + ldd r24,Y+15 + ldd r25,Y+16 + mov r0,r6 + or r0,r14 + eor r2,r0 + mov r0,r7 + or r0,r15 + eor r3,r0 + mov r0,r8 + or r0,r24 + eor r4,r0 + mov r0,r9 + or r0,r25 + eor r5,r0 + mov r0,r2 + or r0,r10 + eor r6,r0 + mov r0,r3 + or r0,r11 + eor r7,r0 + mov r0,r4 + or r0,r12 + eor r8,r0 + mov r0,r5 + or r0,r13 + eor r9,r0 + mov r0,r6 + and r0,r14 + eor r10,r0 + mov r0,r7 + and r0,r15 + eor r11,r0 + mov r0,r8 + and r0,r24 + eor r12,r0 + mov r0,r9 + and r0,r25 + eor r13,r0 + mov r0,r6 + or r0,r10 + eor r14,r0 + mov r0,r7 + or r0,r11 + eor r15,r0 + mov r0,r8 + or r0,r12 + eor r24,r0 + mov r0,r9 + or r0,r13 + eor r25,r0 + mov r0,r2 + or r0,r14 + eor r6,r0 + mov r0,r3 + or r0,r15 + eor r7,r0 + mov r0,r4 + or r0,r24 + eor r8,r0 + mov r0,r5 + or r0,r25 + eor r9,r0 + mov r0,r6 + and r0,r10 + eor r2,r0 + mov r0,r7 + and r0,r11 + eor r3,r0 + mov r0,r8 + and r0,r12 + eor r4,r0 + mov r0,r9 + and r0,r13 + eor r5,r0 + ret +2560: + ldd r26,Y+33 + ldd r27,Y+34 + ld r0,Z + eor r2,r0 + ldd r0,Z+1 + eor r3,r0 + ldd r0,Z+2 + eor r4,r0 + ldd r0,Z+3 + eor r5,r0 + ldd r0,Z+4 + eor r6,r0 + ldd r0,Z+5 + eor r7,r0 + ldd r0,Z+6 + eor r8,r0 + ldd r0,Z+7 + eor r9,r0 + ldd r0,Z+8 + eor r10,r0 + ldd r0,Z+9 + eor r11,r0 + ldd r0,Z+10 + eor r12,r0 + ldd r0,Z+11 + eor r13,r0 + ldd r0,Z+12 + eor r14,r0 + ldd r0,Z+13 + eor r15,r0 + ldd r0,Z+14 + eor r24,r0 + ldd r0,Z+15 + eor r25,r0 + st X+,r2 + st X+,r3 + st X+,r6 + st X+,r7 + st X+,r10 + st X+,r11 + st X+,r14 + st X+,r15 + ldd r20,Y+17 + ldd r21,Y+18 + ldd r0,Z+16 + eor r20,r0 + ldd r0,Z+17 + eor r21,r0 + st X+,r20 + st X+,r21 + ldd r20,Y+21 + ldd r21,Y+22 + ldd r0,Z+20 + eor r20,r0 + ldd r0,Z+21 + eor r21,r0 + st X+,r20 + st X+,r21 + ldd r20,Y+25 + ldd r21,Y+26 + ldd r0,Z+24 + eor r20,r0 + ldd r0,Z+25 + eor r21,r0 + st X+,r20 + st X+,r21 + ldd r20,Y+29 + ldd r21,Y+30 + ldd r0,Z+28 + eor r20,r0 + ldd r0,Z+29 + eor r21,r0 + st X+,r20 + st X+,r21 + st X+,r4 + st X+,r5 + st X+,r8 + st X+,r9 + st X+,r12 + st X+,r13 + st X+,r24 + st X+,r25 + ldd r20,Y+19 + ldd r21,Y+20 + ldd r0,Z+18 + eor r20,r0 + ldd r0,Z+19 + eor r21,r0 + st X+,r20 + st X+,r21 + ldd r20,Y+23 + ldd r21,Y+24 + ldd r0,Z+22 + eor r20,r0 + ldd r0,Z+23 + eor r21,r0 + st X+,r20 + st X+,r21 + ldd r20,Y+27 + ldd r21,Y+28 + ldd r0,Z+26 + eor r20,r0 + ldd r0,Z+27 + eor r21,r0 + st X+,r20 + st X+,r21 + ldd r20,Y+31 + ldd r21,Y+32 + ldd r0,Z+30 + eor r20,r0 + ldd r0,Z+31 + eor r21,r0 + st X+,r20 + st X+,r21 + adiw r28,34 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size saturnin_decrypt_block, .-saturnin_decrypt_block + +#endif diff --git a/saturnin/Implementations/crypto_hash/saturninhashv2/rhys/internal-saturnin.c b/saturnin/Implementations/crypto_hash/saturninhashv2/rhys/internal-saturnin.c new file mode 100644 index 0000000..f4be50d --- /dev/null +++ b/saturnin/Implementations/crypto_hash/saturninhashv2/rhys/internal-saturnin.c @@ -0,0 +1,483 @@ +/* + * Copyright (C) 2020 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "internal-saturnin.h" + +#if !defined(__AVR__) + +/* Round constants for various combinations of rounds and domain_sep */ +static uint32_t const saturnin_rc[] = { + /* RC_10_1 */ + 0x4eb026c2, 0x90595303, 0xaa8fe632, 0xfe928a92, 0x4115a419, + 0x93539532, 0x5db1cc4e, 0x541515ca, 0xbd1f55a8, 0x5a6e1a0d, + /* RC_10_2 */ + 0x4e4526b5, 0xa3565ff0, 0x0f8f20d8, 0x0b54bee1, 0x7d1a6c9d, + 0x17a6280a, 0xaa46c986, 0xc1199062, 0x182c5cde, 0xa00d53fe, + /* RC_10_3 */ + 0x4e162698, 0xb2535ba1, 0x6c8f9d65, 0x5816ad30, 0x691fd4fa, + 0x6bf5bcf9, 0xf8eb3525, 0xb21decfa, 0x7b3da417, 0xf62c94b4, + /* RC_10_4 */ + 0x4faf265b, 0xc5484616, 0x45dcad21, 0xe08bd607, 0x0504fdb8, + 0x1e1f5257, 0x45fbc216, 0xeb529b1f, 0x52194e32, 0x5498c018, + /* RC_10_5 */ + 0x4ffc2676, 0xd44d4247, 0x26dc109c, 0xb3c9c5d6, 0x110145df, + 0x624cc6a4, 0x17563eb5, 0x9856e787, 0x3108b6fb, 0x02b90752, + /* RC_10_6 */ + 0x4f092601, 0xe7424eb4, 0x83dcd676, 0x460ff1a5, 0x2d0e8d5b, + 0xe6b97b9c, 0xe0a13b7d, 0x0d5a622f, 0x943bbf8d, 0xf8da4ea1, + /* RC_16_7 */ + 0x3fba180c, 0x563ab9ab, 0x125ea5ef, 0x859da26c, 0xb8cf779b, + 0x7d4de793, 0x07efb49f, 0x8d525306, 0x1e08e6ab, 0x41729f87, + 0x8c4aef0a, 0x4aa0c9a7, 0xd93a95ef, 0xbb00d2af, 0xb62c5bf0, + 0x386d94d8, + /* RC_16_8 */ + 0x3c9b19a7, 0xa9098694, 0x23f878da, 0xa7b647d3, 0x74fc9d78, + 0xeacaae11, 0x2f31a677, 0x4cc8c054, 0x2f51ca05, 0x5268f195, + 0x4f5b8a2b, 0xf614b4ac, 0xf1d95401, 0x764d2568, 0x6a493611, + 0x8eef9c3e +}; + +/* Loads a 32-bit word from the two halves of a 256-bit Saturnin input block */ +#define saturnin_load_word32(ptr) \ + ((((uint32_t)((ptr)[17])) << 24) | \ + (((uint32_t)((ptr)[16])) << 16) | \ + (((uint32_t)((ptr)[1])) << 8) | \ + ((uint32_t)((ptr)[0]))) + +/* Stores a 32-bit word to the two halves of a 256-bit Saturnin output block */ +#define saturnin_store_word32(ptr, x) \ + do { \ + (ptr)[0] = (uint8_t)(x); \ + (ptr)[1] = (uint8_t)((x) >> 8); \ + (ptr)[16] = (uint8_t)((x) >> 16); \ + (ptr)[17] = (uint8_t)((x) >> 24); \ + } while (0) + +/* Rotate the 4-bit nibbles within a 16-bit word left */ +#define leftRotate4_N(a, mask1, bits1, mask2, bits2) \ + do { \ + (a) = (((a) & (mask1)) << (bits1)) | \ + (((a) & ((mask1) ^ (uint32_t)0xFFFFU)) >> (4 - (bits1))) | \ + (((a) & (((uint32_t)(mask2)) << 16)) << (bits2)) | \ + (((a) & (((uint32_t)((mask2)) << 16) ^ 0xFFFF0000U)) >> (4 - (bits2))); \ + } while (0) + +/* Rotate 16-bit subwords left */ +#define leftRotate16_N(a, mask1, bits1, mask2, bits2) \ + do { \ + (a) = (((a) & (mask1)) << (bits1)) | \ + (((a) & ((mask1) ^ (uint32_t)0xFFFFU)) >> (16 - (bits1))) | \ + (((a) & (((uint32_t)(mask2)) << 16)) << (bits2)) | \ + (((a) & (((uint32_t)((mask2)) << 16) ^ 0xFFFF0000U)) >> (16 - (bits2))); \ + } while (0) + +/** + * \brief XOR the key into the Saturnin state. + * + * \param x0 First word of the bit-sliced state. + * \param x1 Second word of the bit-sliced state. + * \param x2 Third word of the bit-sliced state. + * \param x3 Fourth word of the bit-sliced state. + * \param x4 Fifth word of the bit-sliced state. + * \param x5 Sixth word of the bit-sliced state. + * \param x6 Seventh word of the bit-sliced state. + * \param x7 Eighth word of the bit-sliced state. + */ +#define saturnin_xor_key(x0, x1, x2, x3, x4, x5, x6, x7) \ + do { \ + x0 ^= ks->k[0]; \ + x1 ^= ks->k[1]; \ + x2 ^= ks->k[2]; \ + x3 ^= ks->k[3]; \ + x4 ^= ks->k[4]; \ + x5 ^= ks->k[5]; \ + x6 ^= ks->k[6]; \ + x7 ^= ks->k[7]; \ + } while (0) + +/** + * \brief XOR a rotated version of the key into the Saturnin state. + * + * \param x0 First word of the bit-sliced state. + * \param x1 Second word of the bit-sliced state. + * \param x2 Third word of the bit-sliced state. + * \param x3 Fourth word of the bit-sliced state. + * \param x4 Fifth word of the bit-sliced state. + * \param x5 Sixth word of the bit-sliced state. + * \param x6 Seventh word of the bit-sliced state. + * \param x7 Eighth word of the bit-sliced state. + */ +#define saturnin_xor_key_rotated(x0, x1, x2, x3, x4, x5, x6, x7) \ + do { \ + x0 ^= ks->k[8]; \ + x1 ^= ks->k[9]; \ + x2 ^= ks->k[10]; \ + x3 ^= ks->k[11]; \ + x4 ^= ks->k[12]; \ + x5 ^= ks->k[13]; \ + x6 ^= ks->k[14]; \ + x7 ^= ks->k[15]; \ + } while (0) + +/** + * \brief Applies the Saturnin S-box to a bit-sliced set of nibbles. + * + * \param a First bit-slice. + * \param b Second bit-slice. + * \param c Third bit-slice. + * \param d Fourth bit-slice. + * + * The S-box also involves a rotation on the output words. We perform the + * rotation implicitly in the higher layers. + */ +#define saturnin_sbox(a, b, c, d) \ + do { \ + (a) ^= (b) & (c); \ + (b) ^= (a) | (d); \ + (d) ^= (b) | (c); \ + (c) ^= (b) & (d); \ + (b) ^= (a) | (c); \ + (a) ^= (b) | (d); \ + } while (0) + +/** + * \brief Applies the inverse of the Saturnin S-box to a set of nibbles. + * + * \param a First bit-slice. + * \param b Second bit-slice. + * \param c Third bit-slice. + * \param d Fourth bit-slice. + * + * The inverse of the S-box also involves a rotation on the input words. + * We perform the rotation implicitly in the higher layers. + */ +#define saturnin_sbox_inverse(a, b, c, d) \ + do { \ + (a) ^= (b) | (d); \ + (b) ^= (a) | (c); \ + (c) ^= (b) & (d); \ + (d) ^= (b) | (c); \ + (b) ^= (a) | (d); \ + (a) ^= (b) & (c); \ + } while (0) + +/* Helpers for MDS matrix operations */ +#define SWAP(a) (((a) << 16) | ((a) >> 16)) +#define MUL(x0, x1, x2, x3) \ + do { \ + temp = x0; x0 = x1; x1 = x2; x2 = x3; x3 = temp ^ x0; \ + } while (0) +#define MULINV(x0, x1, x2, x3) \ + do { \ + temp = x3; x3 = x2; x2 = x1; x1 = x0; x0 = x1 ^ temp; \ + } while (0) + +/** + * \brief Applies the MDS matrix to the Saturnin state. + * + * \param x0 First word of the bit-sliced state. + * \param x1 Second word of the bit-sliced state. + * \param x2 Third word of the bit-sliced state. + * \param x3 Fourth word of the bit-sliced state. + * \param x4 Fifth word of the bit-sliced state. + * \param x5 Sixth word of the bit-sliced state. + * \param x6 Seventh word of the bit-sliced state. + * \param x7 Eighth word of the bit-sliced state. + */ +#define saturnin_mds(x0, x1, x2, x3, x4, x5, x6, x7) \ + do { \ + x0 ^= x4; x1 ^= x5; x2 ^= x6; x3 ^= x7; \ + MUL(x4, x5, x6, x7); \ + x4 ^= SWAP(x0); x5 ^= SWAP(x1); \ + x6 ^= SWAP(x2); x7 ^= SWAP(x3); \ + MUL(x0, x1, x2, x3); \ + MUL(x0, x1, x2, x3); \ + x0 ^= x4; x1 ^= x5; x2 ^= x6; x3 ^= x7; \ + x4 ^= SWAP(x0); x5 ^= SWAP(x1); \ + x6 ^= SWAP(x2); x7 ^= SWAP(x3); \ + } while (0) + +/** + * \brief Applies the inverse of the MDS matrix to the Saturnin state. + * + * \param x0 First word of the bit-sliced state. + * \param x1 Second word of the bit-sliced state. + * \param x2 Third word of the bit-sliced state. + * \param x3 Fourth word of the bit-sliced state. + * \param x4 Fifth word of the bit-sliced state. + * \param x5 Sixth word of the bit-sliced state. + * \param x6 Seventh word of the bit-sliced state. + * \param x7 Eighth word of the bit-sliced state. + */ +#define saturnin_mds_inverse(x0, x1, x2, x3, x4, x5, x6, x7) \ + do { \ + x6 ^= SWAP(x2); x7 ^= SWAP(x3); \ + x4 ^= SWAP(x0); x5 ^= SWAP(x1); \ + x0 ^= x4; x1 ^= x5; x2 ^= x6; x3 ^= x7; \ + MULINV(x0, x1, x2, x3); \ + MULINV(x0, x1, x2, x3); \ + x6 ^= SWAP(x2); x7 ^= SWAP(x3); \ + x4 ^= SWAP(x0); x5 ^= SWAP(x1); \ + MULINV(x4, x5, x6, x7); \ + x0 ^= x4; x1 ^= x5; x2 ^= x6; x3 ^= x7; \ + } while (0) + +/** + * \brief Applies the slice permutation to the Saturnin state. + * + * \param x0 First word of the bit-sliced state. + * \param x1 Second word of the bit-sliced state. + * \param x2 Third word of the bit-sliced state. + * \param x3 Fourth word of the bit-sliced state. + * \param x4 Fifth word of the bit-sliced state. + * \param x5 Sixth word of the bit-sliced state. + * \param x6 Seventh word of the bit-sliced state. + * \param x7 Eighth word of the bit-sliced state. + */ +#define saturnin_slice(x0, x1, x2, x3, x4, x5, x6, x7) \ + do { \ + leftRotate4_N(x0, 0xFFFFU, 0, 0x3333, 2); \ + leftRotate4_N(x1, 0xFFFFU, 0, 0x3333, 2); \ + leftRotate4_N(x2, 0xFFFFU, 0, 0x3333, 2); \ + leftRotate4_N(x3, 0xFFFFU, 0, 0x3333, 2); \ + leftRotate4_N(x4, 0x7777U, 1, 0x1111, 3); \ + leftRotate4_N(x5, 0x7777U, 1, 0x1111, 3); \ + leftRotate4_N(x6, 0x7777U, 1, 0x1111, 3); \ + leftRotate4_N(x7, 0x7777U, 1, 0x1111, 3); \ + } while (0) + +/** + * \brief Applies the inverse of the slice permutation to the Saturnin state. + * + * \param x0 First word of the bit-sliced state. + * \param x1 Second word of the bit-sliced state. + * \param x2 Third word of the bit-sliced state. + * \param x3 Fourth word of the bit-sliced state. + * \param x4 Fifth word of the bit-sliced state. + * \param x5 Sixth word of the bit-sliced state. + * \param x6 Seventh word of the bit-sliced state. + * \param x7 Eighth word of the bit-sliced state. + */ +#define saturnin_slice_inverse(x0, x1, x2, x3, x4, x5, x6, x7) \ + do { \ + leftRotate4_N(x0, 0xFFFFU, 0, 0x3333, 2); \ + leftRotate4_N(x1, 0xFFFFU, 0, 0x3333, 2); \ + leftRotate4_N(x2, 0xFFFFU, 0, 0x3333, 2); \ + leftRotate4_N(x3, 0xFFFFU, 0, 0x3333, 2); \ + leftRotate4_N(x4, 0x1111U, 3, 0x7777, 1); \ + leftRotate4_N(x5, 0x1111U, 3, 0x7777, 1); \ + leftRotate4_N(x6, 0x1111U, 3, 0x7777, 1); \ + leftRotate4_N(x7, 0x1111U, 3, 0x7777, 1); \ + } while (0) + +/** + * \brief Applies the sheet permutation to the Saturnin state. + * + * \param x0 First word of the bit-sliced state. + * \param x1 Second word of the bit-sliced state. + * \param x2 Third word of the bit-sliced state. + * \param x3 Fourth word of the bit-sliced state. + * \param x4 Fifth word of the bit-sliced state. + * \param x5 Sixth word of the bit-sliced state. + * \param x6 Seventh word of the bit-sliced state. + * \param x7 Eighth word of the bit-sliced state. + */ +#define saturnin_sheet(x0, x1, x2, x3, x4, x5, x6, x7) \ + do { \ + leftRotate16_N(x0, 0xFFFFU, 0, 0x00FF, 8); \ + leftRotate16_N(x1, 0xFFFFU, 0, 0x00FF, 8); \ + leftRotate16_N(x2, 0xFFFFU, 0, 0x00FF, 8); \ + leftRotate16_N(x3, 0xFFFFU, 0, 0x00FF, 8); \ + leftRotate16_N(x4, 0x0FFFU, 4, 0x000F, 12); \ + leftRotate16_N(x5, 0x0FFFU, 4, 0x000F, 12); \ + leftRotate16_N(x6, 0x0FFFU, 4, 0x000F, 12); \ + leftRotate16_N(x7, 0x0FFFU, 4, 0x000F, 12); \ + } while (0) + +/** + * \brief Applies the inverse of the sheet permutation to the Saturnin state. + * + * \param x0 First word of the bit-sliced state. + * \param x1 Second word of the bit-sliced state. + * \param x2 Third word of the bit-sliced state. + * \param x3 Fourth word of the bit-sliced state. + * \param x4 Fifth word of the bit-sliced state. + * \param x5 Sixth word of the bit-sliced state. + * \param x6 Seventh word of the bit-sliced state. + * \param x7 Eighth word of the bit-sliced state. + */ +#define saturnin_sheet_inverse(x0, x1, x2, x3, x4, x5, x6, x7) \ + do { \ + leftRotate16_N(x0, 0xFFFFU, 0, 0x00FF, 8); \ + leftRotate16_N(x1, 0xFFFFU, 0, 0x00FF, 8); \ + leftRotate16_N(x2, 0xFFFFU, 0, 0x00FF, 8); \ + leftRotate16_N(x3, 0xFFFFU, 0, 0x00FF, 8); \ + leftRotate16_N(x4, 0x000FU, 12, 0x0FFF, 4); \ + leftRotate16_N(x5, 0x000FU, 12, 0x0FFF, 4); \ + leftRotate16_N(x6, 0x000FU, 12, 0x0FFF, 4); \ + leftRotate16_N(x7, 0x000FU, 12, 0x0FFF, 4); \ + } while (0) + +void saturnin_setup_key + (saturnin_key_schedule_t *ks, const unsigned char *key) +{ + int index; + uint32_t temp; + for (index = 0; index < 16; index += 2) { + temp = saturnin_load_word32(key + index); + ks->k[index / 2] = temp; + ks->k[8 + (index / 2)] = ((temp & 0x001F001FU) << 11) | + ((temp >> 5) & 0x07FF07FFU); + } +} + +void saturnin_encrypt_block + (const saturnin_key_schedule_t *ks, unsigned char *output, + const unsigned char *input, unsigned domain) +{ + unsigned rounds = (domain >= SATURNIN_DOMAIN_16_7) ? 8 : 5; + const uint32_t *rc = saturnin_rc + domain; + uint32_t x0, x1, x2, x3, x4, x5, x6, x7, temp; + + /* Load the input into local variables */ + x0 = saturnin_load_word32(input); + x1 = saturnin_load_word32(input + 2); + x2 = saturnin_load_word32(input + 4); + x3 = saturnin_load_word32(input + 6); + x4 = saturnin_load_word32(input + 8); + x5 = saturnin_load_word32(input + 10); + x6 = saturnin_load_word32(input + 12); + x7 = saturnin_load_word32(input + 14); + + /* XOR the key into the state */ + saturnin_xor_key(x0, x1, x2, x3, x4, x5, x6, x7); + + /* Perform all encryption rounds, two at a time */ + for (; rounds > 0; --rounds, rc += 2) { + /* Even rounds */ + saturnin_sbox(x0, x1, x2, x3); + saturnin_sbox(x4, x5, x6, x7); + saturnin_mds(x1, x2, x3, x0, x7, x5, x4, x6); + saturnin_sbox(x1, x2, x3, x0); + saturnin_sbox(x7, x5, x4, x6); + saturnin_slice(x2, x3, x0, x1, x6, x5, x7, x4); + saturnin_mds(x2, x3, x0, x1, x6, x5, x7, x4); + saturnin_slice_inverse(x2, x3, x0, x1, x6, x5, x7, x4); + x2 ^= rc[0]; + saturnin_xor_key_rotated(x2, x3, x0, x1, x6, x5, x7, x4); + + /* Odd rounds */ + saturnin_sbox(x2, x3, x0, x1); + saturnin_sbox(x6, x5, x7, x4); + saturnin_mds(x3, x0, x1, x2, x4, x5, x6, x7); + saturnin_sbox(x3, x0, x1, x2); + saturnin_sbox(x4, x5, x6, x7); + saturnin_sheet(x0, x1, x2, x3, x7, x5, x4, x6); + saturnin_mds(x0, x1, x2, x3, x7, x5, x4, x6); + saturnin_sheet_inverse(x0, x1, x2, x3, x7, x5, x4, x6); + x0 ^= rc[1]; + saturnin_xor_key(x0, x1, x2, x3, x7, x5, x4, x6); + + /* Correct the rotation of the second half before the next round */ + temp = x4; + x4 = x7; + x7 = x6; + x6 = temp; + } + + /* Store the local variables to the output buffer */ + saturnin_store_word32(output, x0); + saturnin_store_word32(output + 2, x1); + saturnin_store_word32(output + 4, x2); + saturnin_store_word32(output + 6, x3); + saturnin_store_word32(output + 8, x4); + saturnin_store_word32(output + 10, x5); + saturnin_store_word32(output + 12, x6); + saturnin_store_word32(output + 14, x7); +} + +void saturnin_decrypt_block + (const saturnin_key_schedule_t *ks, unsigned char *output, + const unsigned char *input, unsigned domain) +{ + unsigned rounds = (domain >= SATURNIN_DOMAIN_16_7) ? 8 : 5; + const uint32_t *rc = saturnin_rc + domain + (rounds - 1) * 2; + uint32_t x0, x1, x2, x3, x4, x5, x6, x7, temp; + + /* Load the input into local variables */ + x0 = saturnin_load_word32(input); + x1 = saturnin_load_word32(input + 2); + x2 = saturnin_load_word32(input + 4); + x3 = saturnin_load_word32(input + 6); + x4 = saturnin_load_word32(input + 8); + x5 = saturnin_load_word32(input + 10); + x6 = saturnin_load_word32(input + 12); + x7 = saturnin_load_word32(input + 14); + + /* Perform all decryption rounds, two at a time */ + for (; rounds > 0; --rounds, rc -= 2) { + /* Correct the rotation of the second half before the next round */ + temp = x6; + x6 = x7; + x7 = x4; + x4 = temp; + + /* Odd rounds */ + saturnin_xor_key(x0, x1, x2, x3, x7, x5, x4, x6); + x0 ^= rc[1]; + saturnin_sheet(x0, x1, x2, x3, x7, x5, x4, x6); + saturnin_mds_inverse(x0, x1, x2, x3, x7, x5, x4, x6); + saturnin_sheet_inverse(x0, x1, x2, x3, x7, x5, x4, x6); + saturnin_sbox_inverse(x3, x0, x1, x2); + saturnin_sbox_inverse(x4, x5, x6, x7); + saturnin_mds_inverse(x3, x0, x1, x2, x4, x5, x6, x7); + saturnin_sbox_inverse(x2, x3, x0, x1); + saturnin_sbox_inverse(x6, x5, x7, x4); + + /* Even rounds */ + saturnin_xor_key_rotated(x2, x3, x0, x1, x6, x5, x7, x4); + x2 ^= rc[0]; + saturnin_slice(x2, x3, x0, x1, x6, x5, x7, x4); + saturnin_mds_inverse(x2, x3, x0, x1, x6, x5, x7, x4); + saturnin_slice_inverse(x2, x3, x0, x1, x6, x5, x7, x4); + saturnin_sbox_inverse(x1, x2, x3, x0); + saturnin_sbox_inverse(x7, x5, x4, x6); + saturnin_mds_inverse(x1, x2, x3, x0, x7, x5, x4, x6); + saturnin_sbox_inverse(x0, x1, x2, x3); + saturnin_sbox_inverse(x4, x5, x6, x7); + } + + /* XOR the key into the state */ + saturnin_xor_key(x0, x1, x2, x3, x4, x5, x6, x7); + + /* Store the local variables to the output buffer */ + saturnin_store_word32(output, x0); + saturnin_store_word32(output + 2, x1); + saturnin_store_word32(output + 4, x2); + saturnin_store_word32(output + 6, x3); + saturnin_store_word32(output + 8, x4); + saturnin_store_word32(output + 10, x5); + saturnin_store_word32(output + 12, x6); + saturnin_store_word32(output + 14, x7); +} + +#endif /* !__AVR__ */ diff --git a/saturnin/Implementations/crypto_hash/saturninhashv2/rhys/internal-saturnin.h b/saturnin/Implementations/crypto_hash/saturninhashv2/rhys/internal-saturnin.h new file mode 100644 index 0000000..8af07c3 --- /dev/null +++ b/saturnin/Implementations/crypto_hash/saturninhashv2/rhys/internal-saturnin.h @@ -0,0 +1,137 @@ +/* + * Copyright (C) 2020 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef LW_INTERNAL_SATURNIN_H +#define LW_INTERNAL_SATURNIN_H + +/** + * \file internal-saturnin.h + * \brief Saturnin block cipher. + * + * References: https://project.inria.fr/saturnin/ + */ + +#include "internal-util.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * \brief Size of a Saturnin block in bytes. + */ +#define SATURNIN_BLOCK_SIZE 32 + +/** + * \brief Domain separator index 1 for the 10-round version of Saturnin. + */ +#define SATURNIN_DOMAIN_10_1 0 + +/** + * \brief Domain separator index 2 for the 10-round version of Saturnin. + */ +#define SATURNIN_DOMAIN_10_2 10 + +/** + * \brief Domain separator index 3 for the 10-round version of Saturnin. + */ +#define SATURNIN_DOMAIN_10_3 20 + +/** + * \brief Domain separator index 4 for the 10-round version of Saturnin. + */ +#define SATURNIN_DOMAIN_10_4 30 + +/** + * \brief Domain separator index 5 for the 10-round version of Saturnin. + */ +#define SATURNIN_DOMAIN_10_5 40 + +/** + * \brief Domain separator index 6 for the 10-round version of Saturnin. + */ +#define SATURNIN_DOMAIN_10_6 50 + +/** + * \brief Domain separator index 7 for the 16-round version of Saturnin. + */ +#define SATURNIN_DOMAIN_16_7 60 + +/** + * \brief Domain separator index 8 for the 16-round version of Saturnin. + */ +#define SATURNIN_DOMAIN_16_8 76 + +/** + * \brief Structure of the key schedule for Saturnin. + */ +typedef struct +{ + /** Pre-computed round keys for Saturnin */ + uint32_t k[16]; + +} saturnin_key_schedule_t; + +/** + * \brief Sets up a key schedule for Saturnin. + * + * \param ks Points to the key schedule to initialize. + * \param key Points to the 32 bytes of the key data. + */ +void saturnin_setup_key + (saturnin_key_schedule_t *ks, const unsigned char *key); + +/** + * \brief Encrypts a 256-bit block with Saturnin. + * + * \param ks Points to the Saturnin key schedule. + * \param output Output buffer which must be at least 32 bytes in length. + * \param input Input buffer which must be at least 32 bytes in length. + * \param domain Domain separator and round count indicator. + * + * The \a input and \a output buffers can be the same buffer for + * in-place encryption. + */ +void saturnin_encrypt_block + (const saturnin_key_schedule_t *ks, unsigned char *output, + const unsigned char *input, unsigned domain); + +/** + * \brief Decrypts a 256-bit block with Saturnin. + * + * \param ks Points to the Saturnin key schedule. + * \param output Output buffer which must be at least 32 bytes in length. + * \param input Input buffer which must be at least 32 bytes in length. + * \param domain Domain separator and round count indicator. + * + * The \a input and \a output buffers can be the same buffer for + * in-place decryption. + */ +void saturnin_decrypt_block + (const saturnin_key_schedule_t *ks, unsigned char *output, + const unsigned char *input, unsigned domain); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/saturnin/Implementations/crypto_hash/saturninhashv2/rhys/saturnin.c b/saturnin/Implementations/crypto_hash/saturninhashv2/rhys/saturnin.c index 734fc69..d2bd2cc 100644 --- a/saturnin/Implementations/crypto_hash/saturninhashv2/rhys/saturnin.c +++ b/saturnin/Implementations/crypto_hash/saturninhashv2/rhys/saturnin.c @@ -21,7 +21,7 @@ */ #include "saturnin.h" -#include "internal-util.h" +#include "internal-saturnin.h" #include aead_cipher_t const saturnin_cipher = { @@ -57,440 +57,22 @@ aead_hash_algorithm_t const saturnin_hash_algorithm = { 0 /* squeeze */ }; -/* Round constant tables for various combinations of rounds and domain_sep */ -static uint32_t const RC_10_1[] = { - 0x4eb026c2, 0x90595303, 0xaa8fe632, 0xfe928a92, 0x4115a419, - 0x93539532, 0x5db1cc4e, 0x541515ca, 0xbd1f55a8, 0x5a6e1a0d -}; -static uint32_t const RC_10_2[] = { - 0x4e4526b5, 0xa3565ff0, 0x0f8f20d8, 0x0b54bee1, 0x7d1a6c9d, - 0x17a6280a, 0xaa46c986, 0xc1199062, 0x182c5cde, 0xa00d53fe -}; -static uint32_t const RC_10_3[] = { - 0x4e162698, 0xb2535ba1, 0x6c8f9d65, 0x5816ad30, 0x691fd4fa, - 0x6bf5bcf9, 0xf8eb3525, 0xb21decfa, 0x7b3da417, 0xf62c94b4 -}; -static uint32_t const RC_10_4[] = { - 0x4faf265b, 0xc5484616, 0x45dcad21, 0xe08bd607, 0x0504fdb8, - 0x1e1f5257, 0x45fbc216, 0xeb529b1f, 0x52194e32, 0x5498c018 -}; -static uint32_t const RC_10_5[] = { - 0x4ffc2676, 0xd44d4247, 0x26dc109c, 0xb3c9c5d6, 0x110145df, - 0x624cc6a4, 0x17563eb5, 0x9856e787, 0x3108b6fb, 0x02b90752 -}; -static uint32_t const RC_10_6[] = { - 0x4f092601, 0xe7424eb4, 0x83dcd676, 0x460ff1a5, 0x2d0e8d5b, - 0xe6b97b9c, 0xe0a13b7d, 0x0d5a622f, 0x943bbf8d, 0xf8da4ea1 -}; -static uint32_t const RC_16_7[] = { - 0x3fba180c, 0x563ab9ab, 0x125ea5ef, 0x859da26c, 0xb8cf779b, - 0x7d4de793, 0x07efb49f, 0x8d525306, 0x1e08e6ab, 0x41729f87, - 0x8c4aef0a, 0x4aa0c9a7, 0xd93a95ef, 0xbb00d2af, 0xb62c5bf0, - 0x386d94d8 -}; -static uint32_t const RC_16_8[] = { - 0x3c9b19a7, 0xa9098694, 0x23f878da, 0xa7b647d3, 0x74fc9d78, - 0xeacaae11, 0x2f31a677, 0x4cc8c054, 0x2f51ca05, 0x5268f195, - 0x4f5b8a2b, 0xf614b4ac, 0xf1d95401, 0x764d2568, 0x6a493611, - 0x8eef9c3e -}; - -/* Rotate the 4-bit nibbles within a 16-bit word left */ -#define leftRotate4_N(a, mask1, bits1, mask2, bits2) \ - do { \ - uint32_t _temp = (a); \ - (a) = ((_temp & (mask1)) << (bits1)) | \ - ((_temp & ((mask1) ^ (uint32_t)0xFFFFU)) >> (4 - (bits1))) | \ - ((_temp & (((uint32_t)(mask2)) << 16)) << (bits2)) | \ - ((_temp & (((uint32_t)((mask2)) << 16) ^ 0xFFFF0000U)) >> (4 - (bits2))); \ - } while (0) - -/* Rotate 16-bit subwords left */ -#define leftRotate16_N(a, mask1, bits1, mask2, bits2) \ - do { \ - uint32_t _temp = (a); \ - (a) = ((_temp & (mask1)) << (bits1)) | \ - ((_temp & ((mask1) ^ (uint32_t)0xFFFFU)) >> (16 - (bits1))) | \ - ((_temp & (((uint32_t)(mask2)) << 16)) << (bits2)) | \ - ((_temp & (((uint32_t)((mask2)) << 16) ^ 0xFFFF0000U)) >> (16 - (bits2))); \ - } while (0) - -/* XOR the SATURNIN state with the key */ -#define saturnin_xor_key() \ - do { \ - for (index = 0; index < 8; ++index) \ - S[index] ^= K[index]; \ - } while (0) - -/* XOR the SATURNIN state with a rotated version of the key */ -#define saturnin_xor_key_rotated() \ - do { \ - for (index = 0; index < 8; ++index) \ - S[index] ^= K[index + 8]; \ - } while (0) - -/* Apply an SBOX layer for SATURNIN - definition from the specification */ -#define S_LAYER(a, b, c, d) \ - do { \ - (a) ^= (b) & (c); \ - (b) ^= (a) | (d); \ - (d) ^= (b) | (c); \ - (c) ^= (b) & (d); \ - (b) ^= (a) | (c); \ - (a) ^= (b) | (d); \ - } while (0) - -/* Apply an SBOX layer for SATURNIN in reverse */ -#define S_LAYER_INVERSE(a, b, c, d) \ - do { \ - (a) ^= (b) | (d); \ - (b) ^= (a) | (c); \ - (c) ^= (b) & (d); \ - (d) ^= (b) | (c); \ - (b) ^= (a) | (d); \ - (a) ^= (b) & (c); \ - } while (0) - -/** - * \brief Applies the SBOX to the SATURNIN state. - * - * \param S The state. - */ -static void saturnin_sbox(uint32_t S[8]) -{ - uint32_t a, b, c, d; - - /* PI_0 on the first half of the state */ - a = S[0]; b = S[1]; c = S[2]; d = S[3]; - S_LAYER(a, b, c, d); - S[0] = b; S[1] = c; S[2] = d; S[3] = a; - - /* PI_1 on the second half of the state */ - a = S[4]; b = S[5]; c = S[6]; d = S[7]; - S_LAYER(a, b, c, d); - S[4] = d; S[5] = b; S[6] = a; S[7] = c; -} - -/** - * \brief Applies the inverse of the SBOX to the SATURNIN state. - * - * \param S The state. - */ -static void saturnin_sbox_inverse(uint32_t S[8]) -{ - uint32_t a, b, c, d; - - /* PI_0 on the first half of the state */ - b = S[0]; c = S[1]; d = S[2]; a = S[3]; - S_LAYER_INVERSE(a, b, c, d); - S[0] = a; S[1] = b; S[2] = c; S[3] = d; - - /* PI_1 on the second half of the state */ - d = S[4]; b = S[5]; a = S[6]; c = S[7]; - S_LAYER_INVERSE(a, b, c, d); - S[4] = a; S[5] = b; S[6] = c; S[7] = d; -} - -/** - * \brief Applies the MDS matrix to the SATURNIN state. - * - * \param S The state. - */ -static void saturnin_mds(uint32_t S[8]) -{ - uint32_t x0, x1, x2, x3, x4, x5, x6, x7; - uint32_t tmp; - - /* Load the state into temporary working variables */ - x0 = S[0]; x1 = S[1]; x2 = S[2]; x3 = S[3]; - x4 = S[4]; x5 = S[5]; x6 = S[6]; x7 = S[7]; - - /* Apply the MDS matrix to the state */ - #define SWAP(a) (((a) << 16) | ((a) >> 16)) - #define MUL(x0, x1, x2, x3, tmp) \ - do { \ - tmp = x0; x0 = x1; x1 = x2; x2 = x3; x3 = tmp ^ x0; \ - } while (0) - x0 ^= x4; x1 ^= x5; x2 ^= x6; x3 ^= x7; - MUL(x4, x5, x6, x7, tmp); - x4 ^= SWAP(x0); x5 ^= SWAP(x1); - x6 ^= SWAP(x2); x7 ^= SWAP(x3); - MUL(x0, x1, x2, x3, tmp); - MUL(x0, x1, x2, x3, tmp); - x0 ^= x4; x1 ^= x5; x2 ^= x6; x3 ^= x7; - x4 ^= SWAP(x0); x5 ^= SWAP(x1); - x6 ^= SWAP(x2); x7 ^= SWAP(x3); - - /* Store the temporary working variables back into the state */ - S[0] = x0; S[1] = x1; S[2] = x2; S[3] = x3; - S[4] = x4; S[5] = x5; S[6] = x6; S[7] = x7; -} - -/** - * \brief Applies the inverse of the MDS matrix to the SATURNIN state. - * - * \param S The state. - */ -static void saturnin_mds_inverse(uint32_t S[8]) -{ - uint32_t x0, x1, x2, x3, x4, x5, x6, x7; - uint32_t tmp; - - /* Load the state into temporary working variables */ - x0 = S[0]; x1 = S[1]; x2 = S[2]; x3 = S[3]; - x4 = S[4]; x5 = S[5]; x6 = S[6]; x7 = S[7]; - - /* Apply the inverse of the MDS matrix to the state */ - #define MULINV(x0, x1, x2, x3, tmp) \ - do { \ - tmp = x3; x3 = x2; x2 = x1; x1 = x0; x0 = x1 ^ tmp; \ - } while (0) - x6 ^= SWAP(x2); x7 ^= SWAP(x3); - x4 ^= SWAP(x0); x5 ^= SWAP(x1); - x0 ^= x4; x1 ^= x5; x2 ^= x6; x3 ^= x7; - MULINV(x0, x1, x2, x3, tmp); - MULINV(x0, x1, x2, x3, tmp); - x6 ^= SWAP(x2); x7 ^= SWAP(x3); - x4 ^= SWAP(x0); x5 ^= SWAP(x1); - MULINV(x4, x5, x6, x7, tmp); - x0 ^= x4; x1 ^= x5; x2 ^= x6; x3 ^= x7; - - /* Store the temporary working variables back into the state */ - S[0] = x0; S[1] = x1; S[2] = x2; S[3] = x3; - S[4] = x4; S[5] = x5; S[6] = x6; S[7] = x7; -} - -/** - * \brief Applies the slice permutation to the SATURNIN state. - * - * \param S The state. - */ -static void saturnin_slice(uint32_t S[8]) -{ - leftRotate4_N(S[0], 0xFFFFU, 0, 0x3333, 2); - leftRotate4_N(S[1], 0xFFFFU, 0, 0x3333, 2); - leftRotate4_N(S[2], 0xFFFFU, 0, 0x3333, 2); - leftRotate4_N(S[3], 0xFFFFU, 0, 0x3333, 2); - - leftRotate4_N(S[4], 0x7777U, 1, 0x1111, 3); - leftRotate4_N(S[5], 0x7777U, 1, 0x1111, 3); - leftRotate4_N(S[6], 0x7777U, 1, 0x1111, 3); - leftRotate4_N(S[7], 0x7777U, 1, 0x1111, 3); -} - -/** - * \brief Applies the inverse of the slice permutation to the SATURNIN state. - * - * \param S The state. - */ -static void saturnin_slice_inverse(uint32_t S[8]) -{ - leftRotate4_N(S[0], 0xFFFFU, 0, 0x3333, 2); - leftRotate4_N(S[1], 0xFFFFU, 0, 0x3333, 2); - leftRotate4_N(S[2], 0xFFFFU, 0, 0x3333, 2); - leftRotate4_N(S[3], 0xFFFFU, 0, 0x3333, 2); - - leftRotate4_N(S[4], 0x1111U, 3, 0x7777, 1); - leftRotate4_N(S[5], 0x1111U, 3, 0x7777, 1); - leftRotate4_N(S[6], 0x1111U, 3, 0x7777, 1); - leftRotate4_N(S[7], 0x1111U, 3, 0x7777, 1); -} - -/** - * \brief Applies the sheet permutation to the SATURNIN state. - * - * \param S The state. - */ -static void saturnin_sheet(uint32_t S[8]) -{ - leftRotate16_N(S[0], 0xFFFFU, 0, 0x00FF, 8); - leftRotate16_N(S[1], 0xFFFFU, 0, 0x00FF, 8); - leftRotate16_N(S[2], 0xFFFFU, 0, 0x00FF, 8); - leftRotate16_N(S[3], 0xFFFFU, 0, 0x00FF, 8); - - leftRotate16_N(S[4], 0x0FFFU, 4, 0x000F, 12); - leftRotate16_N(S[5], 0x0FFFU, 4, 0x000F, 12); - leftRotate16_N(S[6], 0x0FFFU, 4, 0x000F, 12); - leftRotate16_N(S[7], 0x0FFFU, 4, 0x000F, 12); -} - -/** - * \brief Applies the inverse of the sheet permutation to the SATURNIN state. - * - * \param S The state. - */ -static void saturnin_sheet_inverse(uint32_t S[8]) -{ - leftRotate16_N(S[0], 0xFFFFU, 0, 0x00FF, 8); - leftRotate16_N(S[1], 0xFFFFU, 0, 0x00FF, 8); - leftRotate16_N(S[2], 0xFFFFU, 0, 0x00FF, 8); - leftRotate16_N(S[3], 0xFFFFU, 0, 0x00FF, 8); - - leftRotate16_N(S[4], 0x000FU, 12, 0x0FFF, 4); - leftRotate16_N(S[5], 0x000FU, 12, 0x0FFF, 4); - leftRotate16_N(S[6], 0x000FU, 12, 0x0FFF, 4); - leftRotate16_N(S[7], 0x000FU, 12, 0x0FFF, 4); -} - -/** - * \brief Encrypts a 256-bit block with the SATURNIN block cipher. - * - * \param output Ciphertext output block, 32 bytes. - * \param input Plaintext input block, 32 bytes. - * \param key Points to the 32 byte key for the block cipher. - * \param rounds Number of rounds to perform. - * \param RC Round constants to use for domain separation. - * - * The \a input and \a output buffers can be the same. - * - * \sa saturnin_block_decrypt() - */ -static void saturnin_block_encrypt - (unsigned char *output, const unsigned char *input, - const unsigned char *key, unsigned rounds, const uint32_t *RC) -{ - uint32_t K[16]; - uint32_t S[8]; - uint32_t temp; - unsigned index; - - /* Unpack the key and the input block */ - for (index = 0; index < 16; index += 2) { - temp = ((uint32_t)(key[index])) | - (((uint32_t)(key[index + 1])) << 8) | - (((uint32_t)(key[index + 16])) << 16) | - (((uint32_t)(key[index + 17])) << 24); - K[index / 2] = temp; - K[8 + (index / 2)] = ((temp & 0x001F001FU) << 11) | - ((temp >> 5) & 0x07FF07FFU); - S[index / 2] = ((uint32_t)(input[index])) | - (((uint32_t)(input[index + 1])) << 8) | - (((uint32_t)(input[index + 16])) << 16) | - (((uint32_t)(input[index + 17])) << 24); - } - - /* XOR the key into the state */ - saturnin_xor_key(); - - /* Perform all encryption rounds */ - for (; rounds > 0; rounds -= 2, RC += 2) { - saturnin_sbox(S); - saturnin_mds(S); - saturnin_sbox(S); - saturnin_slice(S); - saturnin_mds(S); - saturnin_slice_inverse(S); - S[0] ^= RC[0]; - saturnin_xor_key_rotated(); - - saturnin_sbox(S); - saturnin_mds(S); - saturnin_sbox(S); - saturnin_sheet(S); - saturnin_mds(S); - saturnin_sheet_inverse(S); - S[0] ^= RC[1]; - saturnin_xor_key(); - } - - /* Encode the state into the output block */ - for (index = 0; index < 16; index += 2) { - temp = S[index / 2]; - output[index] = (uint8_t)temp; - output[index + 1] = (uint8_t)(temp >> 8); - output[index + 16] = (uint8_t)(temp >> 16); - output[index + 17] = (uint8_t)(temp >> 24); - } -} - -/** - * \brief Decrypts a 256-bit block with the SATURNIN block cipher. - * - * \param output Plaintext output block, 32 bytes. - * \param input Ciphertext input block, 32 bytes. - * \param key Points to the 32 byte key for the block cipher. - * \param rounds Number of rounds to perform. - * \param RC Round constants to use for domain separation. - * - * The \a input and \a output buffers can be the same. - * - * \sa saturnin_block_encrypt() - */ -static void saturnin_block_decrypt - (unsigned char *output, const unsigned char *input, - const unsigned char *key, unsigned rounds, const uint32_t *RC) -{ - uint32_t K[16]; - uint32_t S[8]; - uint32_t temp; - unsigned index; - - /* Unpack the key and the input block */ - for (index = 0; index < 16; index += 2) { - temp = ((uint32_t)(key[index])) | - (((uint32_t)(key[index + 1])) << 8) | - (((uint32_t)(key[index + 16])) << 16) | - (((uint32_t)(key[index + 17])) << 24); - K[index / 2] = temp; - K[8 + (index / 2)] = ((temp & 0x001F001FU) << 11) | - ((temp >> 5) & 0x07FF07FFU); - S[index / 2] = ((uint32_t)(input[index])) | - (((uint32_t)(input[index + 1])) << 8) | - (((uint32_t)(input[index + 16])) << 16) | - (((uint32_t)(input[index + 17])) << 24); - } - - /* Perform all decryption rounds */ - RC += rounds - 2; - for (; rounds > 0; rounds -= 2, RC -= 2) { - saturnin_xor_key(); - S[0] ^= RC[1]; - saturnin_sheet(S); - saturnin_mds_inverse(S); - saturnin_sheet_inverse(S); - saturnin_sbox_inverse(S); - saturnin_mds_inverse(S); - saturnin_sbox_inverse(S); - - saturnin_xor_key_rotated(); - S[0] ^= RC[0]; - saturnin_slice(S); - saturnin_mds_inverse(S); - saturnin_slice_inverse(S); - saturnin_sbox_inverse(S); - saturnin_mds_inverse(S); - saturnin_sbox_inverse(S); - } - - /* XOR the key into the state */ - saturnin_xor_key(); - - /* Encode the state into the output block */ - for (index = 0; index < 16; index += 2) { - temp = S[index / 2]; - output[index] = (uint8_t)temp; - output[index + 1] = (uint8_t)(temp >> 8); - output[index + 16] = (uint8_t)(temp >> 16); - output[index + 17] = (uint8_t)(temp >> 24); - } -} - /** * \brief Encrypts a 256-bit block with the SATURNIN block cipher and * then XOR's itself to generate a new key. * * \param block Block to be encrypted and then XOR'ed with itself. * \param key Points to the 32 byte key for the block cipher. - * \param rounds Number of rounds to perform. - * \param RC Round constants to use for domain separation. + * \param domain Domain separator and round counter. */ -void saturnin_block_encrypt_xor - (const unsigned char *block, unsigned char *key, - unsigned rounds, const uint32_t *RC) +static void saturnin_block_encrypt_xor + (const unsigned char *block, unsigned char *key, unsigned domain) { - unsigned char temp[32]; - saturnin_block_encrypt(temp, block, key, rounds, RC); - lw_xor_block_2_src(key, block, temp, 32); + saturnin_key_schedule_t ks; + unsigned char *temp = (unsigned char *)ks.k; /* Reuse some stack space */ + saturnin_setup_key(&ks, key); + saturnin_encrypt_block(&ks, temp, block, domain); + lw_xor_block_2_src(key, block, temp, SATURNIN_BLOCK_SIZE); } /** @@ -499,20 +81,20 @@ void saturnin_block_encrypt_xor * \param c Output ciphertext buffer. * \param m Input plaintext buffer. * \param mlen Length of the plaintext in bytes. - * \param k Points to the 32-byte key. + * \param ks Points to the key schedule. * \param block Points to the pre-formatted nonce block. */ static void saturnin_ctr_encrypt (unsigned char *c, const unsigned char *m, unsigned long long mlen, - const unsigned char *k, unsigned char *block) + const saturnin_key_schedule_t *ks, unsigned char *block) { /* Note: Specification requires a 95-bit counter but we only use 32-bit. * This limits the maximum packet size to 128Gb. That should be OK */ uint32_t counter = 1; - unsigned char out[32]; + unsigned char out[SATURNIN_BLOCK_SIZE]; while (mlen >= 32) { be_store_word32(block + 28, counter); - saturnin_block_encrypt(out, block, k, 10, RC_10_1); + saturnin_encrypt_block(ks, out, block, SATURNIN_DOMAIN_10_1); lw_xor_block_2_src(c, out, m, 32); c += 32; m += 32; @@ -521,7 +103,7 @@ static void saturnin_ctr_encrypt } if (mlen > 0) { be_store_word32(block + 28, counter); - saturnin_block_encrypt(out, block, k, 10, RC_10_1); + saturnin_encrypt_block(ks, out, block, SATURNIN_DOMAIN_10_1); lw_xor_block_2_src(c, out, m, (unsigned)mlen); } } @@ -533,18 +115,17 @@ static void saturnin_ctr_encrypt * \param block Temporary block of 32 bytes from the caller. * \param m Points to the message to be authenticated. * \param mlen Length of the message to be authenticated in bytes. - * \param rounds Number of rounds to perform. - * \param RC1 Round constants to use for domain separation on full blocks. - * \param RC2 Round constants to use for domain separation on the last block. + * \param domain1 Round count and domain separator for full blocks. + * \param domain2 Round count and domain separator for the last block. */ static void saturnin_authenticate (unsigned char *tag, unsigned char *block, const unsigned char *m, unsigned long long mlen, - unsigned rounds, const uint32_t *RC1, const uint32_t *RC2) + unsigned domain1, unsigned domain2) { unsigned temp; while (mlen >= 32) { - saturnin_block_encrypt_xor(m, tag, rounds, RC1); + saturnin_block_encrypt_xor(m, tag, domain1); m += 32; mlen -= 32; } @@ -552,7 +133,7 @@ static void saturnin_authenticate memcpy(block, m, temp); block[temp] = 0x80; memset(block + temp + 1, 0, 31 - temp); - saturnin_block_encrypt_xor(block, tag, rounds, RC2); + saturnin_block_encrypt_xor(block, tag, domain2); } int saturnin_aead_encrypt @@ -563,6 +144,7 @@ int saturnin_aead_encrypt const unsigned char *npub, const unsigned char *k) { + saturnin_key_schedule_t ks; unsigned char block[32]; unsigned char *tag; (void)nsec; @@ -576,17 +158,20 @@ int saturnin_aead_encrypt memset(block + 17, 0, 15); /* Encrypt the plaintext in counter mode to produce the ciphertext */ - saturnin_ctr_encrypt(c, m, mlen, k, block); + saturnin_setup_key(&ks, k); + saturnin_ctr_encrypt(c, m, mlen, &ks, block); /* Set the counter back to zero and then encrypt the nonce */ tag = c + mlen; memcpy(tag, k, 32); memset(block + 17, 0, 15); - saturnin_block_encrypt_xor(block, tag, 10, RC_10_2); + saturnin_block_encrypt_xor(block, tag, SATURNIN_DOMAIN_10_2); /* Authenticate the associated data and the ciphertext */ - saturnin_authenticate(tag, block, ad, adlen, 10, RC_10_2, RC_10_3); - saturnin_authenticate(tag, block, c, mlen, 10, RC_10_4, RC_10_5); + saturnin_authenticate + (tag, block, ad, adlen, SATURNIN_DOMAIN_10_2, SATURNIN_DOMAIN_10_3); + saturnin_authenticate + (tag, block, c, mlen, SATURNIN_DOMAIN_10_4, SATURNIN_DOMAIN_10_5); return 0; } @@ -598,6 +183,7 @@ int saturnin_aead_decrypt const unsigned char *npub, const unsigned char *k) { + saturnin_key_schedule_t ks; unsigned char block[32]; unsigned char tag[32]; (void)nsec; @@ -614,17 +200,20 @@ int saturnin_aead_decrypt /* Encrypt the nonce to initialize the authentication phase */ memcpy(tag, k, 32); - saturnin_block_encrypt_xor(block, tag, 10, RC_10_2); + saturnin_block_encrypt_xor(block, tag, SATURNIN_DOMAIN_10_2); /* Authenticate the associated data and the ciphertext */ - saturnin_authenticate(tag, block, ad, adlen, 10, RC_10_2, RC_10_3); - saturnin_authenticate(tag, block, c, *mlen, 10, RC_10_4, RC_10_5); + saturnin_authenticate + (tag, block, ad, adlen, SATURNIN_DOMAIN_10_2, SATURNIN_DOMAIN_10_3); + saturnin_authenticate + (tag, block, c, *mlen, SATURNIN_DOMAIN_10_4, SATURNIN_DOMAIN_10_5); /* Decrypt the ciphertext in counter mode to produce the plaintext */ memcpy(block, npub, 16); block[16] = 0x80; memset(block + 17, 0, 15); - saturnin_ctr_encrypt(m, c, *mlen, k, block); + saturnin_setup_key(&ks, k); + saturnin_ctr_encrypt(m, c, *mlen, &ks, block); /* Check the authentication tag at the end of the message */ return aead_check_tag @@ -639,6 +228,7 @@ int saturnin_short_aead_encrypt const unsigned char *npub, const unsigned char *k) { + saturnin_key_schedule_t ks; unsigned char block[32]; unsigned temp; (void)nsec; @@ -656,7 +246,8 @@ int saturnin_short_aead_encrypt memset(block + 17 + temp, 0, 15 - temp); /* Encrypt the input block to produce the output ciphertext */ - saturnin_block_encrypt(c, block, k, 10, RC_10_6); + saturnin_setup_key(&ks, k); + saturnin_encrypt_block(&ks, c, block, SATURNIN_DOMAIN_10_6); *clen = 32; return 0; } @@ -669,6 +260,7 @@ int saturnin_short_aead_decrypt const unsigned char *npub, const unsigned char *k) { + saturnin_key_schedule_t ks; unsigned char block[32]; unsigned check1, check2, len; int index, result; @@ -682,7 +274,8 @@ int saturnin_short_aead_decrypt return -1; /* Decrypt the ciphertext block */ - saturnin_block_decrypt(block, c, k, 10, RC_10_6); + saturnin_setup_key(&ks, k); + saturnin_decrypt_block(&ks, block, c, SATURNIN_DOMAIN_10_6); /* Verify that the output block starts with the nonce and that it is * padded correctly. We need to do this very carefully to avoid leaking @@ -723,7 +316,8 @@ int saturnin_hash unsigned char tag[32]; unsigned char block[32]; memset(tag, 0, sizeof(tag)); - saturnin_authenticate(tag, block, in, inlen, 16, RC_16_7, RC_16_8); + saturnin_authenticate + (tag, block, in, inlen, SATURNIN_DOMAIN_16_7, SATURNIN_DOMAIN_16_8); memcpy(out, tag, 32); return 0; } @@ -752,12 +346,14 @@ void saturnin_hash_update state->s.count = 0; in += temp; inlen -= temp; - saturnin_block_encrypt_xor(state->s.block, state->s.hash, 16, RC_16_7); + saturnin_block_encrypt_xor + (state->s.block, state->s.hash, SATURNIN_DOMAIN_16_7); } /* Process full blocks that are aligned at state->s.count == 0 */ while (inlen >= 32) { - saturnin_block_encrypt_xor(in, state->s.hash, 16, RC_16_7); + saturnin_block_encrypt_xor + (in, state->s.hash, SATURNIN_DOMAIN_16_7); in += 32; inlen -= 32; } @@ -776,6 +372,7 @@ void saturnin_hash_finalize memset(state->s.block + state->s.count + 1, 0, 31 - state->s.count); /* Generate the final hash value */ - saturnin_block_encrypt_xor(state->s.block, state->s.hash, 16, RC_16_8); + saturnin_block_encrypt_xor + (state->s.block, state->s.hash, SATURNIN_DOMAIN_16_8); memcpy(out, state->s.hash, 32); } diff --git a/skinny/Implementations/crypto_aead/skinnyaeadtk296128v1/rhys/internal-skinny128.c b/skinny/Implementations/crypto_aead/skinnyaeadtk296128v1/rhys/internal-skinny128.c index 579ced1..d4adca0 100644 --- a/skinny/Implementations/crypto_aead/skinnyaeadtk296128v1/rhys/internal-skinny128.c +++ b/skinny/Implementations/crypto_aead/skinnyaeadtk296128v1/rhys/internal-skinny128.c @@ -90,7 +90,7 @@ void skinny_128_384_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -99,11 +99,25 @@ void skinny_128_384_init schedule[0] = TK2[0] ^ TK3[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ TK3[1] ^ (rc >> 4); - /* Permute TK2 and TK3 for the next round */ - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); + /* Permute the bottom half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_permute_tk_half(TK3[2], TK3[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + skinny128_LFSR3(TK3[2]); + skinny128_LFSR3(TK3[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ TK3[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ TK3[3] ^ (rc >> 4); - /* Apply the LFSR's to TK2 and TK3 */ + /* Permute the top half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); + skinny128_permute_tk_half(TK3[0], TK3[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR3(TK3[0]); @@ -112,6 +126,98 @@ void skinny_128_384_init #endif } +/** + * \brief Performs an unrolled round for Skinny-128-384 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + } while (0) + +/** + * \brief Performs an unrolled round for Skinny-128-384 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK3[(1 - half) * 2]); \ + skinny128_LFSR3(TK3[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_384_encrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -125,7 +231,6 @@ void skinny_128_384_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -150,53 +255,19 @@ void skinny_128_384_encrypt TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_384_round(s0, s1, s2, s3, 0, 0); + skinny_128_384_round(s3, s0, s1, s2, 1, 1); + skinny_128_384_round(s2, s3, s0, s1, 0, 2); + skinny_128_384_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -207,6 +278,93 @@ void skinny_128_384_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Inverse permutation on TK1 for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + s1 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s2 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK3[(1 - half) * 2]); \ + skinny128_LFSR2(TK3[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_384_decrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -218,9 +376,8 @@ void skinny_128_384_decrypt uint32_t TK3[4]; uint8_t rc = 0x15; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -251,7 +408,7 @@ void skinny_128_384_decrypt skinny128_fast_forward_tk(TK2); skinny128_fast_forward_tk(TK3); for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2 and TK3. + /* Also fast-forward the LFSR's on every byte of TK2 and TK3 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -263,50 +420,20 @@ void skinny_128_384_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_inv_permute_tk(TK3); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); - skinny128_LFSR2(TK3[2]); - skinny128_LFSR2(TK3[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); + skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_384_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_384_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_384_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_384_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_384_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_384_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_384_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -316,6 +443,57 @@ void skinny_128_384_decrypt le_store_word32(output + 12, s3); } +/** + * \def skinny_128_384_round_tk2(s0, s1, s2, s3, half) + * \brief Performs an unrolled round for skinny_128_384_encrypt_tk2(). + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#if SKINNY_128_SMALL_SCHEDULE +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round_tk_full(s0, s1, s2, s3, half) +#else /* !SKINNY_128_SMALL_SCHEDULE */ +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2] ^ TK2[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1] ^ \ + TK2[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 and TK2 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) +#endif /* !SKINNY_128_SMALL_SCHEDULE */ + void skinny_128_384_encrypt_tk2 (skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input, const unsigned char *tk2) @@ -329,7 +507,6 @@ void skinny_128_384_encrypt_tk2 #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -354,53 +531,14 @@ void skinny_128_384_encrypt_tk2 TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0] ^ TK2[0]; - s1 ^= schedule[1] ^ TK1[1] ^ TK2[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK3); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); -#else - schedule += 2; + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk2(s0, s1, s2, s3, 0, 0); + skinny_128_384_round_tk2(s3, s0, s1, s2, 1, 1); + skinny_128_384_round_tk2(s2, s3, s0, s1, 0, 2); + skinny_128_384_round_tk2(s1, s2, s3, s0, 1, 3); +#if !SKINNY_128_SMALL_SCHEDULE + schedule += 8; #endif } @@ -419,7 +557,6 @@ void skinny_128_384_encrypt_tk_full uint32_t TK1[4]; uint32_t TK2[4]; uint32_t TK3[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -443,45 +580,12 @@ void skinny_128_384_encrypt_tk_full TK3[2] = le_load_word32(key + 40); TK3[3] = le_load_word32(key + 44); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ @@ -518,7 +622,7 @@ void skinny_128_256_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -527,16 +631,87 @@ void skinny_128_256_init schedule[0] = TK2[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ (rc >> 4); - /* Permute TK2 for the next round */ - skinny128_permute_tk(TK2); + /* Permute the bottom half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ (rc >> 4); - /* Apply the LFSR to TK2 */ + /* Permute the top half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); } #endif } +/** + * \brief Performs an unrolled round for Skinny-128-256 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled round for Skinny-128-256 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_256_encrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -549,7 +724,6 @@ void skinny_128_256_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -570,50 +744,19 @@ void skinny_128_256_encrypt TK2[3] = le_load_word32(ks->TK2 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_256_round(s0, s1, s2, s3, 0, 0); + skinny_128_256_round(s3, s0, s1, s2, 1, 1); + skinny_128_256_round(s2, s3, s0, s1, 0, 2); + skinny_128_256_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -624,6 +767,63 @@ void skinny_128_256_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_inv_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_256_decrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -634,9 +834,8 @@ void skinny_128_256_decrypt uint32_t TK2[4]; uint8_t rc = 0x09; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -658,7 +857,7 @@ void skinny_128_256_decrypt TK2[2] = le_load_word32(ks->TK2 + 8); TK2[3] = le_load_word32(ks->TK2 + 12); for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2. + /* Also fast-forward the LFSR's on every byte of TK2 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -666,47 +865,20 @@ void skinny_128_256_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); + skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_256_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_256_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_256_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_256_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_256_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_256_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_256_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -723,7 +895,6 @@ void skinny_128_256_encrypt_tk_full uint32_t s0, s1, s2, s3; uint32_t TK1[4]; uint32_t TK2[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -743,42 +914,12 @@ void skinny_128_256_encrypt_tk_full TK2[2] = le_load_word32(key + 24); TK2[3] = le_load_word32(key + 28); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ diff --git a/skinny/Implementations/crypto_aead/skinnyaeadtk296128v1/rhys/internal-skinnyutil.h b/skinny/Implementations/crypto_aead/skinnyaeadtk296128v1/rhys/internal-skinnyutil.h index 83136cb..8a5296d 100644 --- a/skinny/Implementations/crypto_aead/skinnyaeadtk296128v1/rhys/internal-skinnyutil.h +++ b/skinny/Implementations/crypto_aead/skinnyaeadtk296128v1/rhys/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: diff --git a/skinny/Implementations/crypto_aead/skinnyaeadtk29664v1/rhys/internal-skinny128.c b/skinny/Implementations/crypto_aead/skinnyaeadtk29664v1/rhys/internal-skinny128.c index 579ced1..d4adca0 100644 --- a/skinny/Implementations/crypto_aead/skinnyaeadtk29664v1/rhys/internal-skinny128.c +++ b/skinny/Implementations/crypto_aead/skinnyaeadtk29664v1/rhys/internal-skinny128.c @@ -90,7 +90,7 @@ void skinny_128_384_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -99,11 +99,25 @@ void skinny_128_384_init schedule[0] = TK2[0] ^ TK3[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ TK3[1] ^ (rc >> 4); - /* Permute TK2 and TK3 for the next round */ - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); + /* Permute the bottom half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_permute_tk_half(TK3[2], TK3[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + skinny128_LFSR3(TK3[2]); + skinny128_LFSR3(TK3[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ TK3[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ TK3[3] ^ (rc >> 4); - /* Apply the LFSR's to TK2 and TK3 */ + /* Permute the top half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); + skinny128_permute_tk_half(TK3[0], TK3[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR3(TK3[0]); @@ -112,6 +126,98 @@ void skinny_128_384_init #endif } +/** + * \brief Performs an unrolled round for Skinny-128-384 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + } while (0) + +/** + * \brief Performs an unrolled round for Skinny-128-384 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK3[(1 - half) * 2]); \ + skinny128_LFSR3(TK3[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_384_encrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -125,7 +231,6 @@ void skinny_128_384_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -150,53 +255,19 @@ void skinny_128_384_encrypt TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_384_round(s0, s1, s2, s3, 0, 0); + skinny_128_384_round(s3, s0, s1, s2, 1, 1); + skinny_128_384_round(s2, s3, s0, s1, 0, 2); + skinny_128_384_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -207,6 +278,93 @@ void skinny_128_384_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Inverse permutation on TK1 for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + s1 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s2 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK3[(1 - half) * 2]); \ + skinny128_LFSR2(TK3[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_384_decrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -218,9 +376,8 @@ void skinny_128_384_decrypt uint32_t TK3[4]; uint8_t rc = 0x15; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -251,7 +408,7 @@ void skinny_128_384_decrypt skinny128_fast_forward_tk(TK2); skinny128_fast_forward_tk(TK3); for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2 and TK3. + /* Also fast-forward the LFSR's on every byte of TK2 and TK3 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -263,50 +420,20 @@ void skinny_128_384_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_inv_permute_tk(TK3); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); - skinny128_LFSR2(TK3[2]); - skinny128_LFSR2(TK3[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); + skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_384_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_384_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_384_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_384_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_384_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_384_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_384_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -316,6 +443,57 @@ void skinny_128_384_decrypt le_store_word32(output + 12, s3); } +/** + * \def skinny_128_384_round_tk2(s0, s1, s2, s3, half) + * \brief Performs an unrolled round for skinny_128_384_encrypt_tk2(). + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#if SKINNY_128_SMALL_SCHEDULE +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round_tk_full(s0, s1, s2, s3, half) +#else /* !SKINNY_128_SMALL_SCHEDULE */ +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2] ^ TK2[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1] ^ \ + TK2[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 and TK2 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) +#endif /* !SKINNY_128_SMALL_SCHEDULE */ + void skinny_128_384_encrypt_tk2 (skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input, const unsigned char *tk2) @@ -329,7 +507,6 @@ void skinny_128_384_encrypt_tk2 #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -354,53 +531,14 @@ void skinny_128_384_encrypt_tk2 TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0] ^ TK2[0]; - s1 ^= schedule[1] ^ TK1[1] ^ TK2[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK3); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); -#else - schedule += 2; + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk2(s0, s1, s2, s3, 0, 0); + skinny_128_384_round_tk2(s3, s0, s1, s2, 1, 1); + skinny_128_384_round_tk2(s2, s3, s0, s1, 0, 2); + skinny_128_384_round_tk2(s1, s2, s3, s0, 1, 3); +#if !SKINNY_128_SMALL_SCHEDULE + schedule += 8; #endif } @@ -419,7 +557,6 @@ void skinny_128_384_encrypt_tk_full uint32_t TK1[4]; uint32_t TK2[4]; uint32_t TK3[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -443,45 +580,12 @@ void skinny_128_384_encrypt_tk_full TK3[2] = le_load_word32(key + 40); TK3[3] = le_load_word32(key + 44); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ @@ -518,7 +622,7 @@ void skinny_128_256_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -527,16 +631,87 @@ void skinny_128_256_init schedule[0] = TK2[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ (rc >> 4); - /* Permute TK2 for the next round */ - skinny128_permute_tk(TK2); + /* Permute the bottom half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ (rc >> 4); - /* Apply the LFSR to TK2 */ + /* Permute the top half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); } #endif } +/** + * \brief Performs an unrolled round for Skinny-128-256 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled round for Skinny-128-256 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_256_encrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -549,7 +724,6 @@ void skinny_128_256_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -570,50 +744,19 @@ void skinny_128_256_encrypt TK2[3] = le_load_word32(ks->TK2 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_256_round(s0, s1, s2, s3, 0, 0); + skinny_128_256_round(s3, s0, s1, s2, 1, 1); + skinny_128_256_round(s2, s3, s0, s1, 0, 2); + skinny_128_256_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -624,6 +767,63 @@ void skinny_128_256_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_inv_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_256_decrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -634,9 +834,8 @@ void skinny_128_256_decrypt uint32_t TK2[4]; uint8_t rc = 0x09; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -658,7 +857,7 @@ void skinny_128_256_decrypt TK2[2] = le_load_word32(ks->TK2 + 8); TK2[3] = le_load_word32(ks->TK2 + 12); for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2. + /* Also fast-forward the LFSR's on every byte of TK2 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -666,47 +865,20 @@ void skinny_128_256_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); + skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_256_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_256_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_256_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_256_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_256_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_256_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_256_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -723,7 +895,6 @@ void skinny_128_256_encrypt_tk_full uint32_t s0, s1, s2, s3; uint32_t TK1[4]; uint32_t TK2[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -743,42 +914,12 @@ void skinny_128_256_encrypt_tk_full TK2[2] = le_load_word32(key + 24); TK2[3] = le_load_word32(key + 28); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ diff --git a/skinny/Implementations/crypto_aead/skinnyaeadtk29664v1/rhys/internal-skinnyutil.h b/skinny/Implementations/crypto_aead/skinnyaeadtk29664v1/rhys/internal-skinnyutil.h index 83136cb..8a5296d 100644 --- a/skinny/Implementations/crypto_aead/skinnyaeadtk29664v1/rhys/internal-skinnyutil.h +++ b/skinny/Implementations/crypto_aead/skinnyaeadtk29664v1/rhys/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: diff --git a/skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/rhys/internal-skinny128.c b/skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/rhys/internal-skinny128.c index 579ced1..d4adca0 100644 --- a/skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/rhys/internal-skinny128.c +++ b/skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/rhys/internal-skinny128.c @@ -90,7 +90,7 @@ void skinny_128_384_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -99,11 +99,25 @@ void skinny_128_384_init schedule[0] = TK2[0] ^ TK3[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ TK3[1] ^ (rc >> 4); - /* Permute TK2 and TK3 for the next round */ - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); + /* Permute the bottom half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_permute_tk_half(TK3[2], TK3[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + skinny128_LFSR3(TK3[2]); + skinny128_LFSR3(TK3[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ TK3[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ TK3[3] ^ (rc >> 4); - /* Apply the LFSR's to TK2 and TK3 */ + /* Permute the top half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); + skinny128_permute_tk_half(TK3[0], TK3[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR3(TK3[0]); @@ -112,6 +126,98 @@ void skinny_128_384_init #endif } +/** + * \brief Performs an unrolled round for Skinny-128-384 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + } while (0) + +/** + * \brief Performs an unrolled round for Skinny-128-384 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK3[(1 - half) * 2]); \ + skinny128_LFSR3(TK3[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_384_encrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -125,7 +231,6 @@ void skinny_128_384_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -150,53 +255,19 @@ void skinny_128_384_encrypt TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_384_round(s0, s1, s2, s3, 0, 0); + skinny_128_384_round(s3, s0, s1, s2, 1, 1); + skinny_128_384_round(s2, s3, s0, s1, 0, 2); + skinny_128_384_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -207,6 +278,93 @@ void skinny_128_384_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Inverse permutation on TK1 for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + s1 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s2 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK3[(1 - half) * 2]); \ + skinny128_LFSR2(TK3[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_384_decrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -218,9 +376,8 @@ void skinny_128_384_decrypt uint32_t TK3[4]; uint8_t rc = 0x15; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -251,7 +408,7 @@ void skinny_128_384_decrypt skinny128_fast_forward_tk(TK2); skinny128_fast_forward_tk(TK3); for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2 and TK3. + /* Also fast-forward the LFSR's on every byte of TK2 and TK3 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -263,50 +420,20 @@ void skinny_128_384_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_inv_permute_tk(TK3); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); - skinny128_LFSR2(TK3[2]); - skinny128_LFSR2(TK3[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); + skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_384_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_384_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_384_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_384_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_384_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_384_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_384_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -316,6 +443,57 @@ void skinny_128_384_decrypt le_store_word32(output + 12, s3); } +/** + * \def skinny_128_384_round_tk2(s0, s1, s2, s3, half) + * \brief Performs an unrolled round for skinny_128_384_encrypt_tk2(). + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#if SKINNY_128_SMALL_SCHEDULE +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round_tk_full(s0, s1, s2, s3, half) +#else /* !SKINNY_128_SMALL_SCHEDULE */ +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2] ^ TK2[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1] ^ \ + TK2[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 and TK2 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) +#endif /* !SKINNY_128_SMALL_SCHEDULE */ + void skinny_128_384_encrypt_tk2 (skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input, const unsigned char *tk2) @@ -329,7 +507,6 @@ void skinny_128_384_encrypt_tk2 #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -354,53 +531,14 @@ void skinny_128_384_encrypt_tk2 TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0] ^ TK2[0]; - s1 ^= schedule[1] ^ TK1[1] ^ TK2[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK3); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); -#else - schedule += 2; + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk2(s0, s1, s2, s3, 0, 0); + skinny_128_384_round_tk2(s3, s0, s1, s2, 1, 1); + skinny_128_384_round_tk2(s2, s3, s0, s1, 0, 2); + skinny_128_384_round_tk2(s1, s2, s3, s0, 1, 3); +#if !SKINNY_128_SMALL_SCHEDULE + schedule += 8; #endif } @@ -419,7 +557,6 @@ void skinny_128_384_encrypt_tk_full uint32_t TK1[4]; uint32_t TK2[4]; uint32_t TK3[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -443,45 +580,12 @@ void skinny_128_384_encrypt_tk_full TK3[2] = le_load_word32(key + 40); TK3[3] = le_load_word32(key + 44); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ @@ -518,7 +622,7 @@ void skinny_128_256_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -527,16 +631,87 @@ void skinny_128_256_init schedule[0] = TK2[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ (rc >> 4); - /* Permute TK2 for the next round */ - skinny128_permute_tk(TK2); + /* Permute the bottom half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ (rc >> 4); - /* Apply the LFSR to TK2 */ + /* Permute the top half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); } #endif } +/** + * \brief Performs an unrolled round for Skinny-128-256 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled round for Skinny-128-256 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_256_encrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -549,7 +724,6 @@ void skinny_128_256_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -570,50 +744,19 @@ void skinny_128_256_encrypt TK2[3] = le_load_word32(ks->TK2 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_256_round(s0, s1, s2, s3, 0, 0); + skinny_128_256_round(s3, s0, s1, s2, 1, 1); + skinny_128_256_round(s2, s3, s0, s1, 0, 2); + skinny_128_256_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -624,6 +767,63 @@ void skinny_128_256_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_inv_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_256_decrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -634,9 +834,8 @@ void skinny_128_256_decrypt uint32_t TK2[4]; uint8_t rc = 0x09; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -658,7 +857,7 @@ void skinny_128_256_decrypt TK2[2] = le_load_word32(ks->TK2 + 8); TK2[3] = le_load_word32(ks->TK2 + 12); for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2. + /* Also fast-forward the LFSR's on every byte of TK2 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -666,47 +865,20 @@ void skinny_128_256_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); + skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_256_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_256_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_256_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_256_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_256_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_256_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_256_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -723,7 +895,6 @@ void skinny_128_256_encrypt_tk_full uint32_t s0, s1, s2, s3; uint32_t TK1[4]; uint32_t TK2[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -743,42 +914,12 @@ void skinny_128_256_encrypt_tk_full TK2[2] = le_load_word32(key + 24); TK2[3] = le_load_word32(key + 28); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ diff --git a/skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/rhys/internal-skinnyutil.h b/skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/rhys/internal-skinnyutil.h index 83136cb..8a5296d 100644 --- a/skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/rhys/internal-skinnyutil.h +++ b/skinny/Implementations/crypto_aead/skinnyaeadtk3128128v1/rhys/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: diff --git a/skinny/Implementations/crypto_aead/skinnyaeadtk312864v1/rhys/internal-skinny128.c b/skinny/Implementations/crypto_aead/skinnyaeadtk312864v1/rhys/internal-skinny128.c index 579ced1..d4adca0 100644 --- a/skinny/Implementations/crypto_aead/skinnyaeadtk312864v1/rhys/internal-skinny128.c +++ b/skinny/Implementations/crypto_aead/skinnyaeadtk312864v1/rhys/internal-skinny128.c @@ -90,7 +90,7 @@ void skinny_128_384_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -99,11 +99,25 @@ void skinny_128_384_init schedule[0] = TK2[0] ^ TK3[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ TK3[1] ^ (rc >> 4); - /* Permute TK2 and TK3 for the next round */ - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); + /* Permute the bottom half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_permute_tk_half(TK3[2], TK3[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + skinny128_LFSR3(TK3[2]); + skinny128_LFSR3(TK3[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ TK3[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ TK3[3] ^ (rc >> 4); - /* Apply the LFSR's to TK2 and TK3 */ + /* Permute the top half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); + skinny128_permute_tk_half(TK3[0], TK3[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR3(TK3[0]); @@ -112,6 +126,98 @@ void skinny_128_384_init #endif } +/** + * \brief Performs an unrolled round for Skinny-128-384 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + } while (0) + +/** + * \brief Performs an unrolled round for Skinny-128-384 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK3[(1 - half) * 2]); \ + skinny128_LFSR3(TK3[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_384_encrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -125,7 +231,6 @@ void skinny_128_384_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -150,53 +255,19 @@ void skinny_128_384_encrypt TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_384_round(s0, s1, s2, s3, 0, 0); + skinny_128_384_round(s3, s0, s1, s2, 1, 1); + skinny_128_384_round(s2, s3, s0, s1, 0, 2); + skinny_128_384_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -207,6 +278,93 @@ void skinny_128_384_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Inverse permutation on TK1 for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + s1 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s2 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK3[(1 - half) * 2]); \ + skinny128_LFSR2(TK3[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_384_decrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -218,9 +376,8 @@ void skinny_128_384_decrypt uint32_t TK3[4]; uint8_t rc = 0x15; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -251,7 +408,7 @@ void skinny_128_384_decrypt skinny128_fast_forward_tk(TK2); skinny128_fast_forward_tk(TK3); for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2 and TK3. + /* Also fast-forward the LFSR's on every byte of TK2 and TK3 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -263,50 +420,20 @@ void skinny_128_384_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_inv_permute_tk(TK3); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); - skinny128_LFSR2(TK3[2]); - skinny128_LFSR2(TK3[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); + skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_384_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_384_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_384_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_384_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_384_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_384_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_384_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -316,6 +443,57 @@ void skinny_128_384_decrypt le_store_word32(output + 12, s3); } +/** + * \def skinny_128_384_round_tk2(s0, s1, s2, s3, half) + * \brief Performs an unrolled round for skinny_128_384_encrypt_tk2(). + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#if SKINNY_128_SMALL_SCHEDULE +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round_tk_full(s0, s1, s2, s3, half) +#else /* !SKINNY_128_SMALL_SCHEDULE */ +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2] ^ TK2[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1] ^ \ + TK2[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 and TK2 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) +#endif /* !SKINNY_128_SMALL_SCHEDULE */ + void skinny_128_384_encrypt_tk2 (skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input, const unsigned char *tk2) @@ -329,7 +507,6 @@ void skinny_128_384_encrypt_tk2 #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -354,53 +531,14 @@ void skinny_128_384_encrypt_tk2 TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0] ^ TK2[0]; - s1 ^= schedule[1] ^ TK1[1] ^ TK2[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK3); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); -#else - schedule += 2; + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk2(s0, s1, s2, s3, 0, 0); + skinny_128_384_round_tk2(s3, s0, s1, s2, 1, 1); + skinny_128_384_round_tk2(s2, s3, s0, s1, 0, 2); + skinny_128_384_round_tk2(s1, s2, s3, s0, 1, 3); +#if !SKINNY_128_SMALL_SCHEDULE + schedule += 8; #endif } @@ -419,7 +557,6 @@ void skinny_128_384_encrypt_tk_full uint32_t TK1[4]; uint32_t TK2[4]; uint32_t TK3[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -443,45 +580,12 @@ void skinny_128_384_encrypt_tk_full TK3[2] = le_load_word32(key + 40); TK3[3] = le_load_word32(key + 44); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ @@ -518,7 +622,7 @@ void skinny_128_256_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -527,16 +631,87 @@ void skinny_128_256_init schedule[0] = TK2[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ (rc >> 4); - /* Permute TK2 for the next round */ - skinny128_permute_tk(TK2); + /* Permute the bottom half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ (rc >> 4); - /* Apply the LFSR to TK2 */ + /* Permute the top half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); } #endif } +/** + * \brief Performs an unrolled round for Skinny-128-256 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled round for Skinny-128-256 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_256_encrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -549,7 +724,6 @@ void skinny_128_256_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -570,50 +744,19 @@ void skinny_128_256_encrypt TK2[3] = le_load_word32(ks->TK2 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_256_round(s0, s1, s2, s3, 0, 0); + skinny_128_256_round(s3, s0, s1, s2, 1, 1); + skinny_128_256_round(s2, s3, s0, s1, 0, 2); + skinny_128_256_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -624,6 +767,63 @@ void skinny_128_256_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_inv_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_256_decrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -634,9 +834,8 @@ void skinny_128_256_decrypt uint32_t TK2[4]; uint8_t rc = 0x09; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -658,7 +857,7 @@ void skinny_128_256_decrypt TK2[2] = le_load_word32(ks->TK2 + 8); TK2[3] = le_load_word32(ks->TK2 + 12); for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2. + /* Also fast-forward the LFSR's on every byte of TK2 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -666,47 +865,20 @@ void skinny_128_256_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); + skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_256_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_256_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_256_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_256_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_256_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_256_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_256_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -723,7 +895,6 @@ void skinny_128_256_encrypt_tk_full uint32_t s0, s1, s2, s3; uint32_t TK1[4]; uint32_t TK2[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -743,42 +914,12 @@ void skinny_128_256_encrypt_tk_full TK2[2] = le_load_word32(key + 24); TK2[3] = le_load_word32(key + 28); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ diff --git a/skinny/Implementations/crypto_aead/skinnyaeadtk312864v1/rhys/internal-skinnyutil.h b/skinny/Implementations/crypto_aead/skinnyaeadtk312864v1/rhys/internal-skinnyutil.h index 83136cb..8a5296d 100644 --- a/skinny/Implementations/crypto_aead/skinnyaeadtk312864v1/rhys/internal-skinnyutil.h +++ b/skinny/Implementations/crypto_aead/skinnyaeadtk312864v1/rhys/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: diff --git a/skinny/Implementations/crypto_aead/skinnyaeadtk396128v1/rhys/internal-skinny128.c b/skinny/Implementations/crypto_aead/skinnyaeadtk396128v1/rhys/internal-skinny128.c index 579ced1..d4adca0 100644 --- a/skinny/Implementations/crypto_aead/skinnyaeadtk396128v1/rhys/internal-skinny128.c +++ b/skinny/Implementations/crypto_aead/skinnyaeadtk396128v1/rhys/internal-skinny128.c @@ -90,7 +90,7 @@ void skinny_128_384_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -99,11 +99,25 @@ void skinny_128_384_init schedule[0] = TK2[0] ^ TK3[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ TK3[1] ^ (rc >> 4); - /* Permute TK2 and TK3 for the next round */ - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); + /* Permute the bottom half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_permute_tk_half(TK3[2], TK3[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + skinny128_LFSR3(TK3[2]); + skinny128_LFSR3(TK3[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ TK3[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ TK3[3] ^ (rc >> 4); - /* Apply the LFSR's to TK2 and TK3 */ + /* Permute the top half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); + skinny128_permute_tk_half(TK3[0], TK3[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR3(TK3[0]); @@ -112,6 +126,98 @@ void skinny_128_384_init #endif } +/** + * \brief Performs an unrolled round for Skinny-128-384 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + } while (0) + +/** + * \brief Performs an unrolled round for Skinny-128-384 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK3[(1 - half) * 2]); \ + skinny128_LFSR3(TK3[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_384_encrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -125,7 +231,6 @@ void skinny_128_384_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -150,53 +255,19 @@ void skinny_128_384_encrypt TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_384_round(s0, s1, s2, s3, 0, 0); + skinny_128_384_round(s3, s0, s1, s2, 1, 1); + skinny_128_384_round(s2, s3, s0, s1, 0, 2); + skinny_128_384_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -207,6 +278,93 @@ void skinny_128_384_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Inverse permutation on TK1 for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + s1 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s2 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK3[(1 - half) * 2]); \ + skinny128_LFSR2(TK3[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_384_decrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -218,9 +376,8 @@ void skinny_128_384_decrypt uint32_t TK3[4]; uint8_t rc = 0x15; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -251,7 +408,7 @@ void skinny_128_384_decrypt skinny128_fast_forward_tk(TK2); skinny128_fast_forward_tk(TK3); for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2 and TK3. + /* Also fast-forward the LFSR's on every byte of TK2 and TK3 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -263,50 +420,20 @@ void skinny_128_384_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_inv_permute_tk(TK3); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); - skinny128_LFSR2(TK3[2]); - skinny128_LFSR2(TK3[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); + skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_384_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_384_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_384_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_384_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_384_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_384_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_384_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -316,6 +443,57 @@ void skinny_128_384_decrypt le_store_word32(output + 12, s3); } +/** + * \def skinny_128_384_round_tk2(s0, s1, s2, s3, half) + * \brief Performs an unrolled round for skinny_128_384_encrypt_tk2(). + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#if SKINNY_128_SMALL_SCHEDULE +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round_tk_full(s0, s1, s2, s3, half) +#else /* !SKINNY_128_SMALL_SCHEDULE */ +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2] ^ TK2[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1] ^ \ + TK2[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 and TK2 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) +#endif /* !SKINNY_128_SMALL_SCHEDULE */ + void skinny_128_384_encrypt_tk2 (skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input, const unsigned char *tk2) @@ -329,7 +507,6 @@ void skinny_128_384_encrypt_tk2 #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -354,53 +531,14 @@ void skinny_128_384_encrypt_tk2 TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0] ^ TK2[0]; - s1 ^= schedule[1] ^ TK1[1] ^ TK2[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK3); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); -#else - schedule += 2; + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk2(s0, s1, s2, s3, 0, 0); + skinny_128_384_round_tk2(s3, s0, s1, s2, 1, 1); + skinny_128_384_round_tk2(s2, s3, s0, s1, 0, 2); + skinny_128_384_round_tk2(s1, s2, s3, s0, 1, 3); +#if !SKINNY_128_SMALL_SCHEDULE + schedule += 8; #endif } @@ -419,7 +557,6 @@ void skinny_128_384_encrypt_tk_full uint32_t TK1[4]; uint32_t TK2[4]; uint32_t TK3[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -443,45 +580,12 @@ void skinny_128_384_encrypt_tk_full TK3[2] = le_load_word32(key + 40); TK3[3] = le_load_word32(key + 44); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ @@ -518,7 +622,7 @@ void skinny_128_256_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -527,16 +631,87 @@ void skinny_128_256_init schedule[0] = TK2[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ (rc >> 4); - /* Permute TK2 for the next round */ - skinny128_permute_tk(TK2); + /* Permute the bottom half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ (rc >> 4); - /* Apply the LFSR to TK2 */ + /* Permute the top half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); } #endif } +/** + * \brief Performs an unrolled round for Skinny-128-256 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled round for Skinny-128-256 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_256_encrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -549,7 +724,6 @@ void skinny_128_256_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -570,50 +744,19 @@ void skinny_128_256_encrypt TK2[3] = le_load_word32(ks->TK2 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_256_round(s0, s1, s2, s3, 0, 0); + skinny_128_256_round(s3, s0, s1, s2, 1, 1); + skinny_128_256_round(s2, s3, s0, s1, 0, 2); + skinny_128_256_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -624,6 +767,63 @@ void skinny_128_256_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_inv_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_256_decrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -634,9 +834,8 @@ void skinny_128_256_decrypt uint32_t TK2[4]; uint8_t rc = 0x09; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -658,7 +857,7 @@ void skinny_128_256_decrypt TK2[2] = le_load_word32(ks->TK2 + 8); TK2[3] = le_load_word32(ks->TK2 + 12); for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2. + /* Also fast-forward the LFSR's on every byte of TK2 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -666,47 +865,20 @@ void skinny_128_256_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); + skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_256_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_256_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_256_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_256_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_256_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_256_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_256_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -723,7 +895,6 @@ void skinny_128_256_encrypt_tk_full uint32_t s0, s1, s2, s3; uint32_t TK1[4]; uint32_t TK2[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -743,42 +914,12 @@ void skinny_128_256_encrypt_tk_full TK2[2] = le_load_word32(key + 24); TK2[3] = le_load_word32(key + 28); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ diff --git a/skinny/Implementations/crypto_aead/skinnyaeadtk396128v1/rhys/internal-skinnyutil.h b/skinny/Implementations/crypto_aead/skinnyaeadtk396128v1/rhys/internal-skinnyutil.h index 83136cb..8a5296d 100644 --- a/skinny/Implementations/crypto_aead/skinnyaeadtk396128v1/rhys/internal-skinnyutil.h +++ b/skinny/Implementations/crypto_aead/skinnyaeadtk396128v1/rhys/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: diff --git a/skinny/Implementations/crypto_aead/skinnyaeadtk39664v1/rhys/internal-skinny128.c b/skinny/Implementations/crypto_aead/skinnyaeadtk39664v1/rhys/internal-skinny128.c index 579ced1..d4adca0 100644 --- a/skinny/Implementations/crypto_aead/skinnyaeadtk39664v1/rhys/internal-skinny128.c +++ b/skinny/Implementations/crypto_aead/skinnyaeadtk39664v1/rhys/internal-skinny128.c @@ -90,7 +90,7 @@ void skinny_128_384_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -99,11 +99,25 @@ void skinny_128_384_init schedule[0] = TK2[0] ^ TK3[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ TK3[1] ^ (rc >> 4); - /* Permute TK2 and TK3 for the next round */ - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); + /* Permute the bottom half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_permute_tk_half(TK3[2], TK3[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + skinny128_LFSR3(TK3[2]); + skinny128_LFSR3(TK3[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ TK3[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ TK3[3] ^ (rc >> 4); - /* Apply the LFSR's to TK2 and TK3 */ + /* Permute the top half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); + skinny128_permute_tk_half(TK3[0], TK3[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR3(TK3[0]); @@ -112,6 +126,98 @@ void skinny_128_384_init #endif } +/** + * \brief Performs an unrolled round for Skinny-128-384 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + } while (0) + +/** + * \brief Performs an unrolled round for Skinny-128-384 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK3[(1 - half) * 2]); \ + skinny128_LFSR3(TK3[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_384_encrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -125,7 +231,6 @@ void skinny_128_384_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -150,53 +255,19 @@ void skinny_128_384_encrypt TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_384_round(s0, s1, s2, s3, 0, 0); + skinny_128_384_round(s3, s0, s1, s2, 1, 1); + skinny_128_384_round(s2, s3, s0, s1, 0, 2); + skinny_128_384_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -207,6 +278,93 @@ void skinny_128_384_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Inverse permutation on TK1 for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + s1 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s2 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK3[(1 - half) * 2]); \ + skinny128_LFSR2(TK3[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_384_decrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -218,9 +376,8 @@ void skinny_128_384_decrypt uint32_t TK3[4]; uint8_t rc = 0x15; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -251,7 +408,7 @@ void skinny_128_384_decrypt skinny128_fast_forward_tk(TK2); skinny128_fast_forward_tk(TK3); for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2 and TK3. + /* Also fast-forward the LFSR's on every byte of TK2 and TK3 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -263,50 +420,20 @@ void skinny_128_384_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_inv_permute_tk(TK3); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); - skinny128_LFSR2(TK3[2]); - skinny128_LFSR2(TK3[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); + skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_384_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_384_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_384_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_384_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_384_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_384_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_384_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -316,6 +443,57 @@ void skinny_128_384_decrypt le_store_word32(output + 12, s3); } +/** + * \def skinny_128_384_round_tk2(s0, s1, s2, s3, half) + * \brief Performs an unrolled round for skinny_128_384_encrypt_tk2(). + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#if SKINNY_128_SMALL_SCHEDULE +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round_tk_full(s0, s1, s2, s3, half) +#else /* !SKINNY_128_SMALL_SCHEDULE */ +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2] ^ TK2[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1] ^ \ + TK2[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 and TK2 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) +#endif /* !SKINNY_128_SMALL_SCHEDULE */ + void skinny_128_384_encrypt_tk2 (skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input, const unsigned char *tk2) @@ -329,7 +507,6 @@ void skinny_128_384_encrypt_tk2 #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -354,53 +531,14 @@ void skinny_128_384_encrypt_tk2 TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0] ^ TK2[0]; - s1 ^= schedule[1] ^ TK1[1] ^ TK2[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK3); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); -#else - schedule += 2; + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk2(s0, s1, s2, s3, 0, 0); + skinny_128_384_round_tk2(s3, s0, s1, s2, 1, 1); + skinny_128_384_round_tk2(s2, s3, s0, s1, 0, 2); + skinny_128_384_round_tk2(s1, s2, s3, s0, 1, 3); +#if !SKINNY_128_SMALL_SCHEDULE + schedule += 8; #endif } @@ -419,7 +557,6 @@ void skinny_128_384_encrypt_tk_full uint32_t TK1[4]; uint32_t TK2[4]; uint32_t TK3[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -443,45 +580,12 @@ void skinny_128_384_encrypt_tk_full TK3[2] = le_load_word32(key + 40); TK3[3] = le_load_word32(key + 44); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ @@ -518,7 +622,7 @@ void skinny_128_256_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -527,16 +631,87 @@ void skinny_128_256_init schedule[0] = TK2[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ (rc >> 4); - /* Permute TK2 for the next round */ - skinny128_permute_tk(TK2); + /* Permute the bottom half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ (rc >> 4); - /* Apply the LFSR to TK2 */ + /* Permute the top half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); } #endif } +/** + * \brief Performs an unrolled round for Skinny-128-256 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled round for Skinny-128-256 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_256_encrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -549,7 +724,6 @@ void skinny_128_256_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -570,50 +744,19 @@ void skinny_128_256_encrypt TK2[3] = le_load_word32(ks->TK2 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_256_round(s0, s1, s2, s3, 0, 0); + skinny_128_256_round(s3, s0, s1, s2, 1, 1); + skinny_128_256_round(s2, s3, s0, s1, 0, 2); + skinny_128_256_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -624,6 +767,63 @@ void skinny_128_256_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_inv_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_256_decrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -634,9 +834,8 @@ void skinny_128_256_decrypt uint32_t TK2[4]; uint8_t rc = 0x09; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -658,7 +857,7 @@ void skinny_128_256_decrypt TK2[2] = le_load_word32(ks->TK2 + 8); TK2[3] = le_load_word32(ks->TK2 + 12); for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2. + /* Also fast-forward the LFSR's on every byte of TK2 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -666,47 +865,20 @@ void skinny_128_256_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); + skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_256_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_256_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_256_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_256_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_256_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_256_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_256_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -723,7 +895,6 @@ void skinny_128_256_encrypt_tk_full uint32_t s0, s1, s2, s3; uint32_t TK1[4]; uint32_t TK2[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -743,42 +914,12 @@ void skinny_128_256_encrypt_tk_full TK2[2] = le_load_word32(key + 24); TK2[3] = le_load_word32(key + 28); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ diff --git a/skinny/Implementations/crypto_aead/skinnyaeadtk39664v1/rhys/internal-skinnyutil.h b/skinny/Implementations/crypto_aead/skinnyaeadtk39664v1/rhys/internal-skinnyutil.h index 83136cb..8a5296d 100644 --- a/skinny/Implementations/crypto_aead/skinnyaeadtk39664v1/rhys/internal-skinnyutil.h +++ b/skinny/Implementations/crypto_aead/skinnyaeadtk39664v1/rhys/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: diff --git a/skinny/Implementations/crypto_hash/skinnyhashtk2/rhys/internal-skinny128.c b/skinny/Implementations/crypto_hash/skinnyhashtk2/rhys/internal-skinny128.c index 579ced1..d4adca0 100644 --- a/skinny/Implementations/crypto_hash/skinnyhashtk2/rhys/internal-skinny128.c +++ b/skinny/Implementations/crypto_hash/skinnyhashtk2/rhys/internal-skinny128.c @@ -90,7 +90,7 @@ void skinny_128_384_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -99,11 +99,25 @@ void skinny_128_384_init schedule[0] = TK2[0] ^ TK3[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ TK3[1] ^ (rc >> 4); - /* Permute TK2 and TK3 for the next round */ - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); + /* Permute the bottom half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_permute_tk_half(TK3[2], TK3[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + skinny128_LFSR3(TK3[2]); + skinny128_LFSR3(TK3[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ TK3[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ TK3[3] ^ (rc >> 4); - /* Apply the LFSR's to TK2 and TK3 */ + /* Permute the top half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); + skinny128_permute_tk_half(TK3[0], TK3[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR3(TK3[0]); @@ -112,6 +126,98 @@ void skinny_128_384_init #endif } +/** + * \brief Performs an unrolled round for Skinny-128-384 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + } while (0) + +/** + * \brief Performs an unrolled round for Skinny-128-384 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK3[(1 - half) * 2]); \ + skinny128_LFSR3(TK3[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_384_encrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -125,7 +231,6 @@ void skinny_128_384_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -150,53 +255,19 @@ void skinny_128_384_encrypt TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_384_round(s0, s1, s2, s3, 0, 0); + skinny_128_384_round(s3, s0, s1, s2, 1, 1); + skinny_128_384_round(s2, s3, s0, s1, 0, 2); + skinny_128_384_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -207,6 +278,93 @@ void skinny_128_384_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Inverse permutation on TK1 for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + s1 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s2 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK3[(1 - half) * 2]); \ + skinny128_LFSR2(TK3[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_384_decrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -218,9 +376,8 @@ void skinny_128_384_decrypt uint32_t TK3[4]; uint8_t rc = 0x15; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -251,7 +408,7 @@ void skinny_128_384_decrypt skinny128_fast_forward_tk(TK2); skinny128_fast_forward_tk(TK3); for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2 and TK3. + /* Also fast-forward the LFSR's on every byte of TK2 and TK3 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -263,50 +420,20 @@ void skinny_128_384_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_inv_permute_tk(TK3); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); - skinny128_LFSR2(TK3[2]); - skinny128_LFSR2(TK3[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); + skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_384_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_384_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_384_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_384_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_384_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_384_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_384_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -316,6 +443,57 @@ void skinny_128_384_decrypt le_store_word32(output + 12, s3); } +/** + * \def skinny_128_384_round_tk2(s0, s1, s2, s3, half) + * \brief Performs an unrolled round for skinny_128_384_encrypt_tk2(). + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#if SKINNY_128_SMALL_SCHEDULE +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round_tk_full(s0, s1, s2, s3, half) +#else /* !SKINNY_128_SMALL_SCHEDULE */ +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2] ^ TK2[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1] ^ \ + TK2[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 and TK2 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) +#endif /* !SKINNY_128_SMALL_SCHEDULE */ + void skinny_128_384_encrypt_tk2 (skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input, const unsigned char *tk2) @@ -329,7 +507,6 @@ void skinny_128_384_encrypt_tk2 #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -354,53 +531,14 @@ void skinny_128_384_encrypt_tk2 TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0] ^ TK2[0]; - s1 ^= schedule[1] ^ TK1[1] ^ TK2[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK3); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); -#else - schedule += 2; + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk2(s0, s1, s2, s3, 0, 0); + skinny_128_384_round_tk2(s3, s0, s1, s2, 1, 1); + skinny_128_384_round_tk2(s2, s3, s0, s1, 0, 2); + skinny_128_384_round_tk2(s1, s2, s3, s0, 1, 3); +#if !SKINNY_128_SMALL_SCHEDULE + schedule += 8; #endif } @@ -419,7 +557,6 @@ void skinny_128_384_encrypt_tk_full uint32_t TK1[4]; uint32_t TK2[4]; uint32_t TK3[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -443,45 +580,12 @@ void skinny_128_384_encrypt_tk_full TK3[2] = le_load_word32(key + 40); TK3[3] = le_load_word32(key + 44); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ @@ -518,7 +622,7 @@ void skinny_128_256_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -527,16 +631,87 @@ void skinny_128_256_init schedule[0] = TK2[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ (rc >> 4); - /* Permute TK2 for the next round */ - skinny128_permute_tk(TK2); + /* Permute the bottom half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ (rc >> 4); - /* Apply the LFSR to TK2 */ + /* Permute the top half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); } #endif } +/** + * \brief Performs an unrolled round for Skinny-128-256 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled round for Skinny-128-256 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_256_encrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -549,7 +724,6 @@ void skinny_128_256_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -570,50 +744,19 @@ void skinny_128_256_encrypt TK2[3] = le_load_word32(ks->TK2 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_256_round(s0, s1, s2, s3, 0, 0); + skinny_128_256_round(s3, s0, s1, s2, 1, 1); + skinny_128_256_round(s2, s3, s0, s1, 0, 2); + skinny_128_256_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -624,6 +767,63 @@ void skinny_128_256_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_inv_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_256_decrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -634,9 +834,8 @@ void skinny_128_256_decrypt uint32_t TK2[4]; uint8_t rc = 0x09; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -658,7 +857,7 @@ void skinny_128_256_decrypt TK2[2] = le_load_word32(ks->TK2 + 8); TK2[3] = le_load_word32(ks->TK2 + 12); for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2. + /* Also fast-forward the LFSR's on every byte of TK2 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -666,47 +865,20 @@ void skinny_128_256_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); + skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_256_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_256_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_256_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_256_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_256_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_256_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_256_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -723,7 +895,6 @@ void skinny_128_256_encrypt_tk_full uint32_t s0, s1, s2, s3; uint32_t TK1[4]; uint32_t TK2[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -743,42 +914,12 @@ void skinny_128_256_encrypt_tk_full TK2[2] = le_load_word32(key + 24); TK2[3] = le_load_word32(key + 28); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ diff --git a/skinny/Implementations/crypto_hash/skinnyhashtk2/rhys/internal-skinnyutil.h b/skinny/Implementations/crypto_hash/skinnyhashtk2/rhys/internal-skinnyutil.h index 83136cb..8a5296d 100644 --- a/skinny/Implementations/crypto_hash/skinnyhashtk2/rhys/internal-skinnyutil.h +++ b/skinny/Implementations/crypto_hash/skinnyhashtk2/rhys/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: diff --git a/skinny/Implementations/crypto_hash/skinnyhashtk3/rhys/internal-skinny128.c b/skinny/Implementations/crypto_hash/skinnyhashtk3/rhys/internal-skinny128.c index 579ced1..d4adca0 100644 --- a/skinny/Implementations/crypto_hash/skinnyhashtk3/rhys/internal-skinny128.c +++ b/skinny/Implementations/crypto_hash/skinnyhashtk3/rhys/internal-skinny128.c @@ -90,7 +90,7 @@ void skinny_128_384_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -99,11 +99,25 @@ void skinny_128_384_init schedule[0] = TK2[0] ^ TK3[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ TK3[1] ^ (rc >> 4); - /* Permute TK2 and TK3 for the next round */ - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); + /* Permute the bottom half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_permute_tk_half(TK3[2], TK3[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + skinny128_LFSR3(TK3[2]); + skinny128_LFSR3(TK3[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ TK3[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ TK3[3] ^ (rc >> 4); - /* Apply the LFSR's to TK2 and TK3 */ + /* Permute the top half of TK2 and TK3 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); + skinny128_permute_tk_half(TK3[0], TK3[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR3(TK3[0]); @@ -112,6 +126,98 @@ void skinny_128_384_init #endif } +/** + * \brief Performs an unrolled round for Skinny-128-384 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + } while (0) + +/** + * \brief Performs an unrolled round for Skinny-128-384 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK3[(1 - half) * 2]); \ + skinny128_LFSR3(TK3[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_384_encrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -125,7 +231,6 @@ void skinny_128_384_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -150,53 +255,19 @@ void skinny_128_384_encrypt TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_384_round(s0, s1, s2, s3, 0, 0); + skinny_128_384_round(s3, s0, s1, s2, 1, 1); + skinny_128_384_round(s2, s3, s0, s1, 0, 2); + skinny_128_384_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -207,6 +278,93 @@ void skinny_128_384_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) \ + do { \ + /* Inverse permutation on TK1 for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + s1 ^= schedule[offset * 2] ^ TK1[half * 2]; \ + s2 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1]; \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-384 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK3[(1 - half) * 2], TK3[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK3[(1 - half) * 2]); \ + skinny128_LFSR2(TK3[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ TK3[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ TK3[half * 2 + 1] ^ \ + (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_384_decrypt (const skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -218,9 +376,8 @@ void skinny_128_384_decrypt uint32_t TK3[4]; uint8_t rc = 0x15; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_384_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -251,7 +408,7 @@ void skinny_128_384_decrypt skinny128_fast_forward_tk(TK2); skinny128_fast_forward_tk(TK3); for (round = 0; round < SKINNY_128_384_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2 and TK3. + /* Also fast-forward the LFSR's on every byte of TK2 and TK3 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -263,50 +420,20 @@ void skinny_128_384_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_inv_permute_tk(TK3); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); - skinny128_LFSR2(TK3[2]); - skinny128_LFSR2(TK3[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); + skinny_128_384_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_384_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_384_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_384_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_384_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_384_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_384_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_384_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -316,6 +443,57 @@ void skinny_128_384_decrypt le_store_word32(output + 12, s3); } +/** + * \def skinny_128_384_round_tk2(s0, s1, s2, s3, half) + * \brief Performs an unrolled round for skinny_128_384_encrypt_tk2(). + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#if SKINNY_128_SMALL_SCHEDULE +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round_tk_full(s0, s1, s2, s3, half) +#else /* !SKINNY_128_SMALL_SCHEDULE */ +#define skinny_128_384_round_tk2(s0, s1, s2, s3, half, offset) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + s0 ^= schedule[offset * 2] ^ TK1[half * 2] ^ TK2[half * 2]; \ + s1 ^= schedule[offset * 2 + 1] ^ TK1[half * 2 + 1] ^ \ + TK2[half * 2 + 1]; \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1 and TK2 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) +#endif /* !SKINNY_128_SMALL_SCHEDULE */ + void skinny_128_384_encrypt_tk2 (skinny_128_384_key_schedule_t *ks, unsigned char *output, const unsigned char *input, const unsigned char *tk2) @@ -329,7 +507,6 @@ void skinny_128_384_encrypt_tk2 #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -354,53 +531,14 @@ void skinny_128_384_encrypt_tk2 TK3[3] = le_load_word32(ks->TK3 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* Apply the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0] ^ TK2[0]; - s1 ^= schedule[1] ^ TK1[1] ^ TK2[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK3); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); -#else - schedule += 2; + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk2(s0, s1, s2, s3, 0, 0); + skinny_128_384_round_tk2(s3, s0, s1, s2, 1, 1); + skinny_128_384_round_tk2(s2, s3, s0, s1, 0, 2); + skinny_128_384_round_tk2(s1, s2, s3, s0, 1, 3); +#if !SKINNY_128_SMALL_SCHEDULE + schedule += 8; #endif } @@ -419,7 +557,6 @@ void skinny_128_384_encrypt_tk_full uint32_t TK1[4]; uint32_t TK2[4]; uint32_t TK3[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -443,45 +580,12 @@ void skinny_128_384_encrypt_tk_full TK3[2] = le_load_word32(key + 40); TK3[3] = le_load_word32(key + 44); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_384_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ TK3[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ TK3[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1, TK2, and TK3 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_permute_tk(TK3); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); - skinny128_LFSR3(TK3[0]); - skinny128_LFSR3(TK3[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_384_ROUNDS; round += 4) { + skinny_128_384_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_384_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_384_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_384_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ @@ -518,7 +622,7 @@ void skinny_128_256_init * schedule during encryption operations */ schedule = ks->k; rc = 0; - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round, schedule += 2) { + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2, schedule += 4) { /* XOR the round constants with the current schedule words. * The round constants for the 3rd and 4th rows are * fixed and will be applied during encryption. */ @@ -527,16 +631,87 @@ void skinny_128_256_init schedule[0] = TK2[0] ^ (rc & 0x0F); schedule[1] = TK2[1] ^ (rc >> 4); - /* Permute TK2 for the next round */ - skinny128_permute_tk(TK2); + /* Permute the bottom half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[2], TK2[3]); + skinny128_LFSR2(TK2[2]); + skinny128_LFSR2(TK2[3]); + + /* XOR the round constants with the current schedule words. + * The round constants for the 3rd and 4th rows are + * fixed and will be applied during encryption. */ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; + rc &= 0x3F; + schedule[2] = TK2[2] ^ (rc & 0x0F); + schedule[3] = TK2[3] ^ (rc >> 4); - /* Apply the LFSR to TK2 */ + /* Permute the top half of TK2 for the next round */ + skinny128_permute_tk_half(TK2[0], TK2[1]); skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); } #endif } +/** + * \brief Performs an unrolled round for Skinny-128-256 when only TK1 is + * computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled round for Skinny-128-256 when the entire + * tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Apply the S-box to all bytes in the state */ \ + skinny128_sbox(s0); \ + skinny128_sbox(s1); \ + skinny128_sbox(s2); \ + skinny128_sbox(s3); \ + \ + /* XOR the round constant and the subkey for this round */ \ + rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; \ + rc &= 0x3F; \ + s0 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s1 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s2 ^= 0x02; \ + \ + /* Shift the cells in the rows right, which moves the cell \ + * values up closer to the MSB. That is, we do a left rotate \ + * on the word to rotate the cells in the word right */ \ + s1 = leftRotate8(s1); \ + s2 = leftRotate16(s2); \ + s3 = leftRotate24(s3); \ + \ + /* Mix the columns, but don't rotate the words yet */ \ + s1 ^= s2; \ + s2 ^= s0; \ + s3 ^= s2; \ + \ + /* Permute TK1, TK2, and TK3 in-place for the next round */ \ + skinny128_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR2(TK2[(1 - half) * 2]); \ + skinny128_LFSR2(TK2[(1 - half) * 2 + 1]); \ + } while (0) + void skinny_128_256_encrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -549,7 +724,6 @@ void skinny_128_256_encrypt #else const uint32_t *schedule = ks->k; #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -570,50 +744,19 @@ void skinny_128_256_encrypt TK2[3] = le_load_word32(ks->TK2 + 12); #endif - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ -#if SKINNY_128_SMALL_SCHEDULE - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); -#else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; -#endif - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); #else - schedule += 2; + skinny_128_256_round(s0, s1, s2, s3, 0, 0); + skinny_128_256_round(s3, s0, s1, s2, 1, 1); + skinny_128_256_round(s2, s3, s0, s1, 0, 2); + skinny_128_256_round(s1, s2, s3, s0, 1, 3); + schedule += 8; #endif } @@ -624,6 +767,63 @@ void skinny_128_256_encrypt le_store_word32(output + 12, s3); } +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when + * only TK1 is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + * \param offset Offset between 0 and 3 of the current unrolled round. + */ +#define skinny_128_256_inv_round(s0, s1, s2, s3, half, offset) \ + skinny_128_384_inv_round(s0, s1, s2, s3, half, offset) + +/** + * \brief Performs an unrolled inverse round for Skinny-128-256 when the + * entire tweakey schedule is computed on the fly. + * + * \param s0 First word of the state. + * \param s1 Second word of the state. + * \param s2 Third word of the state. + * \param s3 Fourth word of the state. + * \param half 0 for the bottom half and 1 for the top half of the TK values. + */ +#define skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, half) \ + do { \ + /* Inverse permutation on the tweakey for this round */ \ + skinny128_inv_permute_tk_half \ + (TK1[(1 - half) * 2], TK1[(1 - half) * 2 + 1]); \ + skinny128_inv_permute_tk_half \ + (TK2[(1 - half) * 2], TK2[(1 - half) * 2 + 1]); \ + skinny128_LFSR3(TK2[(1 - half) * 2]); \ + skinny128_LFSR3(TK2[(1 - half) * 2 + 1]); \ + \ + /* Inverse mix of the columns, without word rotation */ \ + s0 ^= s3; \ + s3 ^= s1; \ + s2 ^= s3; \ + \ + /* Inverse shift of the rows */ \ + s2 = leftRotate24(s2); \ + s3 = leftRotate16(s3); \ + s0 = leftRotate8(s0); \ + \ + /* Apply the subkey for this round */ \ + rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); \ + s1 ^= TK1[half * 2] ^ TK2[half * 2] ^ (rc & 0x0F); \ + s2 ^= TK1[half * 2 + 1] ^ TK2[half * 2 + 1] ^ (rc >> 4); \ + s3 ^= 0x02; \ + \ + /* Apply the inverse of the S-box to all bytes in the state */ \ + skinny128_inv_sbox(s0); \ + skinny128_inv_sbox(s1); \ + skinny128_inv_sbox(s2); \ + skinny128_inv_sbox(s3); \ + } while (0) + void skinny_128_256_decrypt (const skinny_128_256_key_schedule_t *ks, unsigned char *output, const unsigned char *input) @@ -634,9 +834,8 @@ void skinny_128_256_decrypt uint32_t TK2[4]; uint8_t rc = 0x09; #else - const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 2]); + const uint32_t *schedule = &(ks->k[SKINNY_128_256_ROUNDS * 2 - 8]); #endif - uint32_t temp; unsigned round; /* Unpack the input block into the state array */ @@ -658,7 +857,7 @@ void skinny_128_256_decrypt TK2[2] = le_load_word32(ks->TK2 + 8); TK2[3] = le_load_word32(ks->TK2 + 12); for (round = 0; round < SKINNY_128_256_ROUNDS; round += 2) { - // Also fast-forward the LFSR's on every byte of TK2. + /* Also fast-forward the LFSR's on every byte of TK2 */ skinny128_LFSR2(TK2[0]); skinny128_LFSR2(TK2[1]); skinny128_LFSR2(TK2[2]); @@ -666,47 +865,20 @@ void skinny_128_256_decrypt } #endif - /* Perform all decryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Inverse permutation on TK1 for this round */ - skinny128_inv_permute_tk(TK1); -#if SKINNY_128_SMALL_SCHEDULE - skinny128_inv_permute_tk(TK2); - skinny128_LFSR3(TK2[2]); - skinny128_LFSR3(TK2[3]); -#endif - - /* Inverse mix of the columns */ - temp = s3; - s3 = s0; - s0 = s1; - s1 = s2; - s3 ^= temp; - s2 = temp ^ s0; - s1 ^= s2; - - /* Inverse shift of the rows */ - s1 = leftRotate24(s1); - s2 = leftRotate16(s2); - s3 = leftRotate8(s3); - - /* Apply the subkey for this round */ + /* Perform all decryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { #if SKINNY_128_SMALL_SCHEDULE - rc = (rc >> 1) ^ (((rc << 5) ^ rc ^ 0x20) & 0x20); - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); + skinny_128_256_inv_round_tk_full(s0, s1, s2, s3, 1); + skinny_128_256_inv_round_tk_full(s1, s2, s3, s0, 0); + skinny_128_256_inv_round_tk_full(s2, s3, s0, s1, 1); + skinny_128_256_inv_round_tk_full(s3, s0, s1, s2, 0); #else - s0 ^= schedule[0] ^ TK1[0]; - s1 ^= schedule[1] ^ TK1[1]; - schedule -= 2; + skinny_128_256_inv_round(s0, s1, s2, s3, 1, 3); + skinny_128_256_inv_round(s1, s2, s3, s0, 0, 2); + skinny_128_256_inv_round(s2, s3, s0, s1, 1, 1); + skinny_128_256_inv_round(s3, s0, s1, s2, 0, 0); + schedule -= 8; #endif - s2 ^= 0x02; - - /* Apply the inverse of the S-box to all bytes in the state */ - skinny128_inv_sbox(s0); - skinny128_inv_sbox(s1); - skinny128_inv_sbox(s2); - skinny128_inv_sbox(s3); } /* Pack the result into the output buffer */ @@ -723,7 +895,6 @@ void skinny_128_256_encrypt_tk_full uint32_t s0, s1, s2, s3; uint32_t TK1[4]; uint32_t TK2[4]; - uint32_t temp; unsigned round; uint8_t rc = 0; @@ -743,42 +914,12 @@ void skinny_128_256_encrypt_tk_full TK2[2] = le_load_word32(key + 24); TK2[3] = le_load_word32(key + 28); - /* Perform all encryption rounds */ - for (round = 0; round < SKINNY_128_256_ROUNDS; ++round) { - /* Apply the S-box to all bytes in the state */ - skinny128_sbox(s0); - skinny128_sbox(s1); - skinny128_sbox(s2); - skinny128_sbox(s3); - - /* XOR the round constant and the subkey for this round */ - rc = (rc << 1) ^ ((rc >> 5) & 0x01) ^ ((rc >> 4) & 0x01) ^ 0x01; - rc &= 0x3F; - s0 ^= TK1[0] ^ TK2[0] ^ (rc & 0x0F); - s1 ^= TK1[1] ^ TK2[1] ^ (rc >> 4); - s2 ^= 0x02; - - /* Shift the cells in the rows right, which moves the cell - * values up closer to the MSB. That is, we do a left rotate - * on the word to rotate the cells in the word right */ - s1 = leftRotate8(s1); - s2 = leftRotate16(s2); - s3 = leftRotate24(s3); - - /* Mix the columns */ - s1 ^= s2; - s2 ^= s0; - temp = s3 ^ s2; - s3 = s2; - s2 = s1; - s1 = s0; - s0 = temp; - - /* Permute TK1 and TK2 for the next round */ - skinny128_permute_tk(TK1); - skinny128_permute_tk(TK2); - skinny128_LFSR2(TK2[0]); - skinny128_LFSR2(TK2[1]); + /* Perform all encryption rounds four at a time */ + for (round = 0; round < SKINNY_128_256_ROUNDS; round += 4) { + skinny_128_256_round_tk_full(s0, s1, s2, s3, 0); + skinny_128_256_round_tk_full(s3, s0, s1, s2, 1); + skinny_128_256_round_tk_full(s2, s3, s0, s1, 0); + skinny_128_256_round_tk_full(s1, s2, s3, s0, 1); } /* Pack the result into the output buffer */ diff --git a/skinny/Implementations/crypto_hash/skinnyhashtk3/rhys/internal-skinnyutil.h b/skinny/Implementations/crypto_hash/skinnyhashtk3/rhys/internal-skinnyutil.h index 83136cb..8a5296d 100644 --- a/skinny/Implementations/crypto_hash/skinnyhashtk3/rhys/internal-skinnyutil.h +++ b/skinny/Implementations/crypto_hash/skinnyhashtk3/rhys/internal-skinnyutil.h @@ -74,6 +74,21 @@ extern "C" { ( row3 & 0x00FF0000U); \ } while (0) +#define skinny128_permute_tk_half(tk2, tk3) \ + do { \ + /* Permute the bottom half of the tweakey state in place, no swap */ \ + uint32_t row2 = tk2; \ + uint32_t row3 = tk3; \ + row3 = (row3 << 16) | (row3 >> 16); \ + tk2 = ((row2 >> 8) & 0x000000FFU) | \ + ((row2 << 16) & 0x00FF0000U) | \ + ( row3 & 0xFF00FF00U); \ + tk3 = ((row2 >> 16) & 0x000000FFU) | \ + (row2 & 0xFF000000U) | \ + ((row3 << 8) & 0x0000FF00U) | \ + ( row3 & 0x00FF0000U); \ + } while (0) + #define skinny128_inv_permute_tk(tk) \ do { \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ @@ -91,6 +106,21 @@ extern "C" { ((row1 << 8) & 0x00FF0000U); \ } while (0) +#define skinny128_inv_permute_tk_half(tk0, tk1) \ + do { \ + /* Permute the top half of the tweakey state in place, no swap */ \ + uint32_t row0 = tk0; \ + uint32_t row1 = tk1; \ + tk0 = ((row0 >> 16) & 0x000000FFU) | \ + ((row0 << 8) & 0x0000FF00U) | \ + ((row1 << 16) & 0x00FF0000U) | \ + ( row1 & 0xFF000000U); \ + tk1 = ((row0 >> 16) & 0x0000FF00U) | \ + ((row0 << 16) & 0xFF000000U) | \ + ((row1 >> 16) & 0x000000FFU) | \ + ((row1 << 8) & 0x00FF0000U); \ + } while (0) + /* * Apply the SKINNY sbox. The original version from the specification is * equivalent to: diff --git a/spook/Implementations/crypto_aead/spook128mu384v1/rhys/internal-spook-avr.S b/spook/Implementations/crypto_aead/spook128mu384v1/rhys/internal-spook-avr.S new file mode 100644 index 0000000..05ca51e --- /dev/null +++ b/spook/Implementations/crypto_aead/spook128mu384v1/rhys/internal-spook-avr.S @@ -0,0 +1,4898 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .text +.global clyde128_encrypt + .type clyde128_encrypt, @function +clyde128_encrypt: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r23 + push r22 + movw r30,r24 + movw r26,r20 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 36 + ld r20,X+ + ld r21,X+ + ld r22,X+ + ld r23,X+ + ld r2,X+ + ld r3,X+ + ld r4,X+ + ld r5,X+ + ld r6,X+ + ld r7,X+ + ld r8,X+ + ld r9,X+ + ld r10,X+ + ld r11,X+ + ld r12,X+ + ld r13,X+ + ld r0,Z + eor r20,r0 + ldd r0,Z+1 + eor r21,r0 + ldd r0,Z+2 + eor r22,r0 + ldd r0,Z+3 + eor r23,r0 + ldd r0,Z+4 + eor r2,r0 + ldd r0,Z+5 + eor r3,r0 + ldd r0,Z+6 + eor r4,r0 + ldd r0,Z+7 + eor r5,r0 + ldd r0,Z+8 + eor r6,r0 + ldd r0,Z+9 + eor r7,r0 + ldd r0,Z+10 + eor r8,r0 + ldd r0,Z+11 + eor r9,r0 + ldd r0,Z+12 + eor r10,r0 + ldd r0,Z+13 + eor r11,r0 + ldd r0,Z+14 + eor r12,r0 + ldd r0,Z+15 + eor r13,r0 + movw r26,r18 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+1,r18 + std Y+2,r19 + std Y+3,r14 + std Y+4,r15 + eor r20,r18 + eor r21,r19 + eor r22,r14 + eor r23,r15 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+5,r18 + std Y+6,r19 + std Y+7,r14 + std Y+8,r15 + eor r2,r18 + eor r3,r19 + eor r4,r14 + eor r5,r15 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+9,r18 + std Y+10,r19 + std Y+11,r14 + std Y+12,r15 + eor r6,r18 + eor r7,r19 + eor r8,r14 + eor r9,r15 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+13,r18 + std Y+14,r19 + std Y+15,r14 + std Y+16,r15 + eor r10,r18 + eor r11,r19 + eor r12,r14 + eor r13,r15 + rcall 151f + ldi r27,1 + eor r20,r27 + rcall 151f + ldi r27,1 + eor r2,r27 + rcall 635f + rcall 151f + ldi r27,1 + eor r6,r27 + rcall 151f + ldi r27,1 + eor r10,r27 + rcall 635f + rcall 151f + ldi r27,1 + eor r20,r27 + eor r2,r27 + rcall 151f + ldi r27,1 + eor r2,r27 + eor r6,r27 + rcall 635f + rcall 151f + ldi r27,1 + eor r6,r27 + eor r10,r27 + rcall 151f + ldi r27,1 + eor r20,r27 + eor r2,r27 + eor r10,r27 + rcall 635f + rcall 151f + ldi r27,1 + eor r20,r27 + eor r6,r27 + rcall 151f + ldi r27,1 + eor r2,r27 + eor r10,r27 + rcall 635f + rcall 151f + ldi r27,1 + eor r20,r27 + eor r2,r27 + eor r6,r27 + rcall 151f + ldi r27,1 + eor r2,r27 + eor r6,r27 + eor r10,r27 + rcall 635f + rjmp 725f +151: + movw r18,r20 + movw r14,r22 + and r18,r2 + and r19,r3 + and r14,r4 + and r15,r5 + eor r18,r6 + eor r19,r7 + eor r14,r8 + eor r15,r9 + movw r24,r10 + movw r16,r12 + and r24,r20 + and r25,r21 + and r16,r22 + and r17,r23 + eor r24,r2 + eor r25,r3 + eor r16,r4 + eor r17,r5 + movw r6,r18 + movw r8,r14 + and r6,r24 + and r7,r25 + and r8,r16 + and r9,r17 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + and r10,r18 + and r11,r19 + and r12,r14 + and r13,r15 + eor r10,r20 + eor r11,r21 + eor r12,r22 + eor r13,r23 + movw r20,r24 + movw r22,r16 + movw r2,r18 + movw r4,r14 + mov r18,r21 + mov r19,r22 + mov r14,r23 + mov r15,r20 + eor r0,r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + or r15,r0 + eor r18,r20 + eor r19,r21 + eor r14,r22 + eor r15,r23 + mov r24,r3 + mov r25,r4 + mov r16,r5 + mov r17,r2 + eor r0,r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r24,r2 + eor r25,r3 + eor r16,r4 + eor r17,r5 + movw r26,r18 + mov r1,r14 + mov r0,r15 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r18,r26 + eor r19,r27 + eor r14,r1 + eor r15,r0 + movw r26,r24 + mov r1,r16 + mov r0,r17 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r24,r26 + eor r25,r27 + eor r16,r1 + eor r17,r0 + mov r0,r22 + mov r22,r20 + mov r20,r0 + mov r0,r23 + mov r23,r21 + mov r21,r0 + bst r20,0 + lsr r23 + ror r22 + ror r21 + ror r20 + bld r23,7 + eor r20,r18 + eor r21,r19 + eor r22,r14 + eor r23,r15 + mov r0,r4 + mov r4,r2 + mov r2,r0 + mov r0,r5 + mov r5,r3 + mov r3,r0 + bst r2,0 + lsr r5 + ror r4 + ror r3 + ror r2 + bld r5,7 + eor r2,r24 + eor r3,r25 + eor r4,r16 + eor r5,r17 + movw r18,r20 + movw r14,r22 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + eor r18,r20 + eor r19,r21 + eor r14,r22 + eor r15,r23 + movw r24,r2 + movw r16,r4 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + eor r24,r2 + eor r25,r3 + eor r16,r4 + eor r17,r5 + mov r26,r17 + mov r27,r24 + mov r1,r25 + mov r0,r16 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r20,r26 + eor r21,r27 + eor r22,r1 + eor r23,r0 + mov r26,r15 + mov r27,r18 + mov r1,r19 + mov r0,r14 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r2,r26 + eor r3,r27 + eor r4,r1 + eor r5,r0 + bst r19,7 + lsl r14 + rol r15 + rol r18 + rol r19 + bld r14,0 + eor r20,r14 + eor r21,r15 + eor r22,r18 + eor r23,r19 + bst r25,7 + lsl r16 + rol r17 + rol r24 + rol r25 + bld r16,0 + eor r2,r16 + eor r3,r17 + eor r4,r24 + eor r5,r25 + mov r18,r7 + mov r19,r8 + mov r14,r9 + mov r15,r6 + eor r0,r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + or r15,r0 + eor r18,r6 + eor r19,r7 + eor r14,r8 + eor r15,r9 + mov r24,r11 + mov r25,r12 + mov r16,r13 + mov r17,r10 + eor r0,r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r24,r10 + eor r25,r11 + eor r16,r12 + eor r17,r13 + movw r26,r18 + mov r1,r14 + mov r0,r15 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r18,r26 + eor r19,r27 + eor r14,r1 + eor r15,r0 + movw r26,r24 + mov r1,r16 + mov r0,r17 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r24,r26 + eor r25,r27 + eor r16,r1 + eor r17,r0 + mov r0,r8 + mov r8,r6 + mov r6,r0 + mov r0,r9 + mov r9,r7 + mov r7,r0 + bst r6,0 + lsr r9 + ror r8 + ror r7 + ror r6 + bld r9,7 + eor r6,r18 + eor r7,r19 + eor r8,r14 + eor r9,r15 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + bst r10,0 + lsr r13 + ror r12 + ror r11 + ror r10 + bld r13,7 + eor r10,r24 + eor r11,r25 + eor r12,r16 + eor r13,r17 + movw r18,r6 + movw r14,r8 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + eor r18,r6 + eor r19,r7 + eor r14,r8 + eor r15,r9 + movw r24,r10 + movw r16,r12 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + eor r24,r10 + eor r25,r11 + eor r16,r12 + eor r17,r13 + mov r26,r17 + mov r27,r24 + mov r1,r25 + mov r0,r16 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r6,r26 + eor r7,r27 + eor r8,r1 + eor r9,r0 + mov r26,r15 + mov r27,r18 + mov r1,r19 + mov r0,r14 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r10,r26 + eor r11,r27 + eor r12,r1 + eor r13,r0 + bst r19,7 + lsl r14 + rol r15 + rol r18 + rol r19 + bld r14,0 + eor r6,r14 + eor r7,r15 + eor r8,r18 + eor r9,r19 + bst r25,7 + lsl r16 + rol r17 + rol r24 + rol r25 + bld r16,0 + eor r10,r16 + eor r11,r17 + eor r12,r24 + eor r13,r25 + ret +635: + ldd r18,Y+9 + ldd r19,Y+10 + ldd r14,Y+11 + ldd r15,Y+12 + ldd r24,Y+1 + ldd r25,Y+2 + ldd r16,Y+3 + ldd r17,Y+4 + eor r18,r24 + eor r19,r25 + eor r14,r16 + eor r15,r17 + std Y+9,r24 + std Y+10,r25 + std Y+11,r16 + std Y+12,r17 + std Y+1,r18 + std Y+2,r19 + std Y+3,r14 + std Y+4,r15 + eor r20,r18 + eor r21,r19 + eor r22,r14 + eor r23,r15 + eor r6,r24 + eor r7,r25 + eor r8,r16 + eor r9,r17 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r14,Y+15 + ldd r15,Y+16 + ldd r24,Y+5 + ldd r25,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + eor r18,r24 + eor r19,r25 + eor r14,r16 + eor r15,r17 + std Y+13,r24 + std Y+14,r25 + std Y+15,r16 + std Y+16,r17 + std Y+5,r18 + std Y+6,r19 + std Y+7,r14 + std Y+8,r15 + eor r2,r18 + eor r3,r19 + eor r4,r14 + eor r5,r15 + eor r10,r24 + eor r11,r25 + eor r12,r16 + eor r13,r17 + ld r0,Z + eor r20,r0 + ldd r0,Z+1 + eor r21,r0 + ldd r0,Z+2 + eor r22,r0 + ldd r0,Z+3 + eor r23,r0 + ldd r0,Z+4 + eor r2,r0 + ldd r0,Z+5 + eor r3,r0 + ldd r0,Z+6 + eor r4,r0 + ldd r0,Z+7 + eor r5,r0 + ldd r0,Z+8 + eor r6,r0 + ldd r0,Z+9 + eor r7,r0 + ldd r0,Z+10 + eor r8,r0 + ldd r0,Z+11 + eor r9,r0 + ldd r0,Z+12 + eor r10,r0 + ldd r0,Z+13 + eor r11,r0 + ldd r0,Z+14 + eor r12,r0 + ldd r0,Z+15 + eor r13,r0 + ret +725: + ldd r26,Y+17 + ldd r27,Y+18 + st X+,r20 + st X+,r21 + st X+,r22 + st X+,r23 + st X+,r2 + st X+,r3 + st X+,r4 + st X+,r5 + st X+,r6 + st X+,r7 + st X+,r8 + st X+,r9 + st X+,r10 + st X+,r11 + st X+,r12 + st X+,r13 + adiw r28,18 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size clyde128_encrypt, .-clyde128_encrypt + + .text +.global clyde128_decrypt + .type clyde128_decrypt, @function +clyde128_decrypt: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r23 + push r22 + movw r30,r24 + movw r26,r20 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 36 + ld r20,X+ + ld r21,X+ + ld r22,X+ + ld r23,X+ + ld r2,X+ + ld r3,X+ + ld r4,X+ + ld r5,X+ + ld r6,X+ + ld r7,X+ + ld r8,X+ + ld r9,X+ + ld r10,X+ + ld r11,X+ + ld r12,X+ + ld r13,X+ + movw r26,r18 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+1,r18 + std Y+2,r19 + std Y+3,r14 + std Y+4,r15 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+5,r18 + std Y+6,r19 + std Y+7,r14 + std Y+8,r15 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+9,r18 + std Y+10,r19 + std Y+11,r14 + std Y+12,r15 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+13,r18 + std Y+14,r19 + std Y+15,r14 + std Y+16,r15 + rcall 533f + ldi r27,1 + eor r2,r27 + eor r6,r27 + eor r10,r27 + rcall 103f + ldi r27,1 + eor r20,r27 + eor r2,r27 + eor r6,r27 + rcall 103f + rcall 533f + ldi r27,1 + eor r2,r27 + eor r10,r27 + rcall 103f + ldi r27,1 + eor r20,r27 + eor r6,r27 + rcall 103f + rcall 533f + ldi r27,1 + eor r20,r27 + eor r2,r27 + eor r10,r27 + rcall 103f + ldi r27,1 + eor r6,r27 + eor r10,r27 + rcall 103f + rcall 533f + ldi r27,1 + eor r2,r27 + eor r6,r27 + rcall 103f + ldi r27,1 + eor r20,r27 + eor r2,r27 + rcall 103f + rcall 533f + ldi r27,1 + eor r10,r27 + rcall 103f + ldi r27,1 + eor r6,r27 + rcall 103f + rcall 533f + ldi r27,1 + eor r2,r27 + rcall 103f + ldi r27,1 + eor r20,r27 + rcall 103f + rjmp 623f +103: + mov r18,r23 + mov r19,r20 + mov r14,r21 + mov r15,r22 + bst r18,0 + lsr r15 + ror r14 + ror r19 + ror r18 + bld r15,7 + eor r18,r20 + eor r19,r21 + eor r14,r22 + eor r15,r23 + mov r24,r5 + mov r25,r2 + mov r16,r3 + mov r17,r4 + bst r24,0 + lsr r17 + ror r16 + ror r25 + ror r24 + bld r17,7 + eor r24,r2 + eor r25,r3 + eor r16,r4 + eor r17,r5 + movw r26,r18 + mov r1,r14 + mov r0,r15 + bst r0,7 + lsl r26 + rol r27 + rol r1 + rol r0 + bld r26,0 + eor r20,r26 + eor r21,r27 + eor r22,r1 + eor r23,r0 + movw r26,r24 + mov r1,r16 + mov r0,r17 + bst r0,7 + lsl r26 + rol r27 + rol r1 + rol r0 + bld r26,0 + eor r2,r26 + eor r3,r27 + eor r4,r1 + eor r5,r0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + eor r20,r15 + eor r21,r18 + eor r22,r19 + eor r23,r14 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + eor r2,r17 + eor r3,r24 + eor r4,r25 + eor r5,r16 + movw r18,r20 + movw r14,r22 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + eor r18,r20 + eor r19,r21 + eor r14,r22 + eor r15,r23 + movw r24,r2 + movw r16,r4 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + eor r24,r2 + eor r25,r3 + eor r16,r4 + eor r17,r5 + mov r26,r17 + mov r27,r24 + mov r1,r25 + mov r0,r16 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r20,r26 + eor r21,r27 + eor r22,r1 + eor r23,r0 + mov r26,r15 + mov r27,r18 + mov r1,r19 + mov r0,r14 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r2,r26 + eor r3,r27 + eor r4,r1 + eor r5,r0 + bst r20,0 + lsr r23 + ror r22 + ror r21 + ror r20 + bld r23,7 + eor r18,r22 + eor r19,r23 + eor r14,r20 + eor r15,r21 + bst r2,0 + lsr r5 + ror r4 + ror r3 + ror r2 + bld r5,7 + eor r24,r4 + eor r25,r5 + eor r16,r2 + eor r17,r3 + movw r20,r14 + movw r22,r18 + movw r2,r16 + movw r4,r24 + mov r18,r9 + mov r19,r6 + mov r14,r7 + mov r15,r8 + bst r18,0 + lsr r15 + ror r14 + ror r19 + ror r18 + bld r15,7 + eor r18,r6 + eor r19,r7 + eor r14,r8 + eor r15,r9 + mov r24,r13 + mov r25,r10 + mov r16,r11 + mov r17,r12 + bst r24,0 + lsr r17 + ror r16 + ror r25 + ror r24 + bld r17,7 + eor r24,r10 + eor r25,r11 + eor r16,r12 + eor r17,r13 + movw r26,r18 + mov r1,r14 + mov r0,r15 + bst r0,7 + lsl r26 + rol r27 + rol r1 + rol r0 + bld r26,0 + eor r6,r26 + eor r7,r27 + eor r8,r1 + eor r9,r0 + movw r26,r24 + mov r1,r16 + mov r0,r17 + bst r0,7 + lsl r26 + rol r27 + rol r1 + rol r0 + bld r26,0 + eor r10,r26 + eor r11,r27 + eor r12,r1 + eor r13,r0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + eor r6,r15 + eor r7,r18 + eor r8,r19 + eor r9,r14 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + eor r10,r17 + eor r11,r24 + eor r12,r25 + eor r13,r16 + movw r18,r6 + movw r14,r8 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + eor r18,r6 + eor r19,r7 + eor r14,r8 + eor r15,r9 + movw r24,r10 + movw r16,r12 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + eor r24,r10 + eor r25,r11 + eor r16,r12 + eor r17,r13 + mov r26,r17 + mov r27,r24 + mov r1,r25 + mov r0,r16 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r6,r26 + eor r7,r27 + eor r8,r1 + eor r9,r0 + mov r26,r15 + mov r27,r18 + mov r1,r19 + mov r0,r14 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r10,r26 + eor r11,r27 + eor r12,r1 + eor r13,r0 + bst r6,0 + lsr r9 + ror r8 + ror r7 + ror r6 + bld r9,7 + eor r18,r8 + eor r19,r9 + eor r14,r6 + eor r15,r7 + bst r10,0 + lsr r13 + ror r12 + ror r11 + ror r10 + bld r13,7 + eor r24,r12 + eor r25,r13 + eor r16,r10 + eor r17,r11 + movw r6,r14 + movw r8,r18 + movw r10,r16 + movw r12,r24 + movw r24,r20 + movw r16,r22 + and r24,r2 + and r25,r3 + and r16,r4 + and r17,r5 + eor r24,r6 + eor r25,r7 + eor r16,r8 + eor r17,r9 + movw r18,r2 + movw r14,r4 + and r18,r24 + and r19,r25 + and r14,r16 + and r15,r17 + eor r18,r10 + eor r19,r11 + eor r14,r12 + eor r15,r13 + movw r10,r24 + movw r12,r16 + and r24,r18 + and r25,r19 + and r16,r14 + and r17,r15 + eor r24,r20 + eor r25,r21 + eor r16,r22 + eor r17,r23 + movw r6,r18 + movw r8,r14 + and r6,r24 + and r7,r25 + and r8,r16 + and r9,r17 + eor r6,r2 + eor r7,r3 + eor r8,r4 + eor r9,r5 + movw r20,r18 + movw r22,r14 + movw r2,r24 + movw r4,r16 + ret +533: + ld r0,Z + eor r20,r0 + ldd r0,Z+1 + eor r21,r0 + ldd r0,Z+2 + eor r22,r0 + ldd r0,Z+3 + eor r23,r0 + ldd r0,Z+4 + eor r2,r0 + ldd r0,Z+5 + eor r3,r0 + ldd r0,Z+6 + eor r4,r0 + ldd r0,Z+7 + eor r5,r0 + ldd r0,Z+8 + eor r6,r0 + ldd r0,Z+9 + eor r7,r0 + ldd r0,Z+10 + eor r8,r0 + ldd r0,Z+11 + eor r9,r0 + ldd r0,Z+12 + eor r10,r0 + ldd r0,Z+13 + eor r11,r0 + ldd r0,Z+14 + eor r12,r0 + ldd r0,Z+15 + eor r13,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r14,Y+3 + ldd r15,Y+4 + ldd r24,Y+9 + ldd r25,Y+10 + ldd r16,Y+11 + ldd r17,Y+12 + eor r20,r18 + eor r21,r19 + eor r22,r14 + eor r23,r15 + eor r6,r24 + eor r7,r25 + eor r8,r16 + eor r9,r17 + eor r18,r24 + eor r19,r25 + eor r14,r16 + eor r15,r17 + std Y+1,r24 + std Y+2,r25 + std Y+3,r16 + std Y+4,r17 + std Y+9,r18 + std Y+10,r19 + std Y+11,r14 + std Y+12,r15 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r14,Y+7 + ldd r15,Y+8 + ldd r24,Y+13 + ldd r25,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + eor r2,r18 + eor r3,r19 + eor r4,r14 + eor r5,r15 + eor r10,r24 + eor r11,r25 + eor r12,r16 + eor r13,r17 + eor r18,r24 + eor r19,r25 + eor r14,r16 + eor r15,r17 + std Y+5,r24 + std Y+6,r25 + std Y+7,r16 + std Y+8,r17 + std Y+13,r18 + std Y+14,r19 + std Y+15,r14 + std Y+16,r15 + ret +623: + ld r0,Z + eor r20,r0 + ldd r0,Z+1 + eor r21,r0 + ldd r0,Z+2 + eor r22,r0 + ldd r0,Z+3 + eor r23,r0 + ldd r0,Z+4 + eor r2,r0 + ldd r0,Z+5 + eor r3,r0 + ldd r0,Z+6 + eor r4,r0 + ldd r0,Z+7 + eor r5,r0 + ldd r0,Z+8 + eor r6,r0 + ldd r0,Z+9 + eor r7,r0 + ldd r0,Z+10 + eor r8,r0 + ldd r0,Z+11 + eor r9,r0 + ldd r0,Z+12 + eor r10,r0 + ldd r0,Z+13 + eor r11,r0 + ldd r0,Z+14 + eor r12,r0 + ldd r0,Z+15 + eor r13,r0 + ldd r0,Y+1 + eor r20,r0 + ldd r0,Y+2 + eor r21,r0 + ldd r0,Y+3 + eor r22,r0 + ldd r0,Y+4 + eor r23,r0 + ldd r0,Y+5 + eor r2,r0 + ldd r0,Y+6 + eor r3,r0 + ldd r0,Y+7 + eor r4,r0 + ldd r0,Y+8 + eor r5,r0 + ldd r0,Y+9 + eor r6,r0 + ldd r0,Y+10 + eor r7,r0 + ldd r0,Y+11 + eor r8,r0 + ldd r0,Y+12 + eor r9,r0 + ldd r0,Y+13 + eor r10,r0 + ldd r0,Y+14 + eor r11,r0 + ldd r0,Y+15 + eor r12,r0 + ldd r0,Y+16 + eor r13,r0 + ldd r26,Y+17 + ldd r27,Y+18 + st X+,r20 + st X+,r21 + st X+,r22 + st X+,r23 + st X+,r2 + st X+,r3 + st X+,r4 + st X+,r5 + st X+,r6 + st X+,r7 + st X+,r8 + st X+,r9 + st X+,r10 + st X+,r11 + st X+,r12 + st X+,r13 + adiw r28,18 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size clyde128_decrypt, .-clyde128_decrypt + + .text +.global shadow512 + .type shadow512, @function +shadow512: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 1083f + rcall 1127f + ldi r25,1 + eor r18,r25 + rcall 1083f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 1083f + rcall 1127f + ldi r25,2 + eor r18,r25 + rcall 1083f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 1083f + rcall 1127f + ldi r25,4 + eor r18,r25 + rcall 1083f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + ldd r18,Z+48 + ldd r19,Z+49 + ldd r20,Z+50 + ldd r21,Z+51 + ldd r22,Z+52 + ldd r23,Z+53 + ldd r26,Z+54 + ldd r27,Z+55 + ldd r28,Z+56 + ldd r29,Z+57 + ldd r2,Z+58 + ldd r3,Z+59 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + rcall 1083f + rcall 1127f + ldi r25,8 + eor r18,r25 + rcall 1083f + std Z+48,r18 + std Z+49,r19 + std Z+50,r20 + std Z+51,r21 + std Z+52,r22 + std Z+53,r23 + std Z+54,r26 + std Z+55,r27 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + std Z+60,r4 + std Z+61,r5 + std Z+62,r6 + std Z+63,r7 + rcall 1553f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+52 + ldi r16,8 + eor r18,r16 + std Z+52,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 1083f + rcall 1127f + ldi r25,1 + eor r28,r25 + rcall 1083f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 1083f + rcall 1127f + ldi r25,2 + eor r28,r25 + rcall 1083f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 1083f + rcall 1127f + ldi r25,4 + eor r28,r25 + rcall 1083f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + ldd r18,Z+48 + ldd r19,Z+49 + ldd r20,Z+50 + ldd r21,Z+51 + ldd r22,Z+52 + ldd r23,Z+53 + ldd r26,Z+54 + ldd r27,Z+55 + ldd r28,Z+56 + ldd r29,Z+57 + ldd r2,Z+58 + ldd r3,Z+59 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + rcall 1083f + rcall 1127f + ldi r25,8 + eor r28,r25 + rcall 1083f + std Z+48,r18 + std Z+49,r19 + std Z+50,r20 + std Z+51,r21 + std Z+52,r22 + std Z+53,r23 + std Z+54,r26 + std Z+55,r27 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + std Z+60,r4 + std Z+61,r5 + std Z+62,r6 + std Z+63,r7 + rcall 1553f + ldd r18,Z+12 + ldi r25,1 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+28 + ldi r24,2 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+44 + ldi r17,4 + eor r18,r17 + std Z+44,r18 + ldd r18,Z+60 + ldi r16,8 + eor r18,r16 + std Z+60,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 1083f + rcall 1127f + ldi r25,1 + eor r18,r25 + eor r22,r25 + rcall 1083f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 1083f + rcall 1127f + ldi r25,2 + eor r18,r25 + eor r22,r25 + rcall 1083f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 1083f + rcall 1127f + ldi r25,4 + eor r18,r25 + eor r22,r25 + rcall 1083f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + ldd r18,Z+48 + ldd r19,Z+49 + ldd r20,Z+50 + ldd r21,Z+51 + ldd r22,Z+52 + ldd r23,Z+53 + ldd r26,Z+54 + ldd r27,Z+55 + ldd r28,Z+56 + ldd r29,Z+57 + ldd r2,Z+58 + ldd r3,Z+59 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + rcall 1083f + rcall 1127f + ldi r25,8 + eor r18,r25 + eor r22,r25 + rcall 1083f + std Z+48,r18 + std Z+49,r19 + std Z+50,r20 + std Z+51,r21 + std Z+52,r22 + std Z+53,r23 + std Z+54,r26 + std Z+55,r27 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + std Z+60,r4 + std Z+61,r5 + std Z+62,r6 + std Z+63,r7 + rcall 1553f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+8 + eor r18,r25 + std Z+8,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+24 + eor r18,r24 + std Z+24,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+40 + eor r18,r17 + std Z+40,r18 + ldd r18,Z+52 + ldi r16,8 + eor r18,r16 + std Z+52,r18 + ldd r18,Z+56 + eor r18,r16 + std Z+56,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 1083f + rcall 1127f + ldi r25,1 + eor r28,r25 + eor r4,r25 + rcall 1083f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 1083f + rcall 1127f + ldi r25,2 + eor r28,r25 + eor r4,r25 + rcall 1083f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 1083f + rcall 1127f + ldi r25,4 + eor r28,r25 + eor r4,r25 + rcall 1083f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + ldd r18,Z+48 + ldd r19,Z+49 + ldd r20,Z+50 + ldd r21,Z+51 + ldd r22,Z+52 + ldd r23,Z+53 + ldd r26,Z+54 + ldd r27,Z+55 + ldd r28,Z+56 + ldd r29,Z+57 + ldd r2,Z+58 + ldd r3,Z+59 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + rcall 1083f + rcall 1127f + ldi r25,8 + eor r28,r25 + eor r4,r25 + rcall 1083f + std Z+48,r18 + std Z+49,r19 + std Z+50,r20 + std Z+51,r21 + std Z+52,r22 + std Z+53,r23 + std Z+54,r26 + std Z+55,r27 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + std Z+60,r4 + std Z+61,r5 + std Z+62,r6 + std Z+63,r7 + rcall 1553f + ld r18,Z + ldi r25,1 + eor r18,r25 + st Z,r18 + ldd r18,Z+4 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+12 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+16 + ldi r24,2 + eor r18,r24 + std Z+16,r18 + ldd r18,Z+20 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+28 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+32 + ldi r17,4 + eor r18,r17 + std Z+32,r18 + ldd r18,Z+36 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+44 + eor r18,r17 + std Z+44,r18 + ldd r18,Z+48 + ldi r16,8 + eor r18,r16 + std Z+48,r18 + ldd r18,Z+52 + eor r18,r16 + std Z+52,r18 + ldd r18,Z+60 + eor r18,r16 + std Z+60,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 1083f + rcall 1127f + ldi r25,1 + eor r18,r25 + eor r28,r25 + rcall 1083f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 1083f + rcall 1127f + ldi r25,2 + eor r18,r25 + eor r28,r25 + rcall 1083f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 1083f + rcall 1127f + ldi r25,4 + eor r18,r25 + eor r28,r25 + rcall 1083f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + ldd r18,Z+48 + ldd r19,Z+49 + ldd r20,Z+50 + ldd r21,Z+51 + ldd r22,Z+52 + ldd r23,Z+53 + ldd r26,Z+54 + ldd r27,Z+55 + ldd r28,Z+56 + ldd r29,Z+57 + ldd r2,Z+58 + ldd r3,Z+59 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + rcall 1083f + rcall 1127f + ldi r25,8 + eor r18,r25 + eor r28,r25 + rcall 1083f + std Z+48,r18 + std Z+49,r19 + std Z+50,r20 + std Z+51,r21 + std Z+52,r22 + std Z+53,r23 + std Z+54,r26 + std Z+55,r27 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + std Z+60,r4 + std Z+61,r5 + std Z+62,r6 + std Z+63,r7 + rcall 1553f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+12 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+28 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+44 + eor r18,r17 + std Z+44,r18 + ldd r18,Z+52 + ldi r16,8 + eor r18,r16 + std Z+52,r18 + ldd r18,Z+60 + eor r18,r16 + std Z+60,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 1083f + rcall 1127f + ldi r25,1 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 1083f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 1083f + rcall 1127f + ldi r25,2 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 1083f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 1083f + rcall 1127f + ldi r25,4 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 1083f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + ldd r18,Z+48 + ldd r19,Z+49 + ldd r20,Z+50 + ldd r21,Z+51 + ldd r22,Z+52 + ldd r23,Z+53 + ldd r26,Z+54 + ldd r27,Z+55 + ldd r28,Z+56 + ldd r29,Z+57 + ldd r2,Z+58 + ldd r3,Z+59 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + rcall 1083f + rcall 1127f + ldi r25,8 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 1083f + std Z+48,r18 + std Z+49,r19 + std Z+50,r20 + std Z+51,r21 + std Z+52,r22 + std Z+53,r23 + std Z+54,r26 + std Z+55,r27 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + std Z+60,r4 + std Z+61,r5 + std Z+62,r6 + std Z+63,r7 + rcall 1553f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+8 + eor r18,r25 + std Z+8,r18 + ldd r18,Z+12 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+24 + eor r18,r24 + std Z+24,r18 + ldd r18,Z+28 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+40 + eor r18,r17 + std Z+40,r18 + ldd r18,Z+44 + eor r18,r17 + std Z+44,r18 + ldd r18,Z+52 + ldi r16,8 + eor r18,r16 + std Z+52,r18 + ldd r18,Z+56 + eor r18,r16 + std Z+56,r18 + ldd r18,Z+60 + eor r18,r16 + std Z+60,r18 + rjmp 1795f +1083: + movw r8,r18 + movw r10,r20 + and r8,r22 + and r9,r23 + and r10,r26 + and r11,r27 + eor r8,r28 + eor r9,r29 + eor r10,r2 + eor r11,r3 + movw r12,r4 + movw r14,r6 + and r12,r18 + and r13,r19 + and r14,r20 + and r15,r21 + eor r12,r22 + eor r13,r23 + eor r14,r26 + eor r15,r27 + movw r28,r8 + movw r2,r10 + and r28,r12 + and r29,r13 + and r2,r14 + and r3,r15 + eor r28,r4 + eor r29,r5 + eor r2,r6 + eor r3,r7 + and r4,r8 + and r5,r9 + and r6,r10 + and r7,r11 + eor r4,r18 + eor r5,r19 + eor r6,r20 + eor r7,r21 + movw r18,r12 + movw r20,r14 + movw r22,r8 + movw r26,r10 + ret +1127: + mov r8,r19 + mov r9,r20 + mov r10,r21 + mov r11,r18 + mov r0,r1 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + or r11,r0 + eor r8,r18 + eor r9,r19 + eor r10,r20 + eor r11,r21 + mov r12,r23 + mov r13,r26 + mov r14,r27 + mov r15,r22 + mov r0,r1 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + or r15,r0 + eor r12,r22 + eor r13,r23 + eor r14,r26 + eor r15,r27 + movw r24,r8 + movw r16,r10 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r8,r24 + eor r9,r25 + eor r10,r16 + eor r11,r17 + movw r24,r12 + movw r16,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r12,r24 + eor r13,r25 + eor r14,r16 + eor r15,r17 + mov r0,r20 + mov r20,r18 + mov r18,r0 + mov r0,r21 + mov r21,r19 + mov r19,r0 + bst r18,0 + lsr r21 + ror r20 + ror r19 + ror r18 + bld r21,7 + eor r18,r8 + eor r19,r9 + eor r20,r10 + eor r21,r11 + mov r0,r26 + mov r26,r22 + mov r22,r0 + mov r0,r27 + mov r27,r23 + mov r23,r0 + bst r22,0 + lsr r27 + ror r26 + ror r23 + ror r22 + bld r27,7 + eor r22,r12 + eor r23,r13 + eor r26,r14 + eor r27,r15 + movw r8,r18 + movw r10,r20 + lsl r8 + rol r9 + rol r10 + rol r11 + adc r8,r1 + eor r8,r18 + eor r9,r19 + eor r10,r20 + eor r11,r21 + movw r12,r22 + movw r14,r26 + lsl r12 + rol r13 + rol r14 + rol r15 + adc r12,r1 + eor r12,r22 + eor r13,r23 + eor r14,r26 + eor r15,r27 + mov r24,r15 + mov r25,r12 + mov r16,r13 + mov r17,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r18,r24 + eor r19,r25 + eor r20,r16 + eor r21,r17 + mov r24,r11 + mov r25,r8 + mov r16,r9 + mov r17,r10 + bst r24,0 + lsr r17 + ror r16 + ror r25 + ror r24 + bld r17,7 + eor r22,r24 + eor r23,r25 + eor r26,r16 + eor r27,r17 + lsl r10 + rol r11 + rol r8 + rol r9 + adc r10,r1 + eor r18,r10 + eor r19,r11 + eor r20,r8 + eor r21,r9 + lsl r14 + rol r15 + rol r12 + rol r13 + adc r14,r1 + eor r22,r14 + eor r23,r15 + eor r26,r12 + eor r27,r13 + mov r8,r29 + mov r9,r2 + mov r10,r3 + mov r11,r28 + mov r0,r1 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + or r11,r0 + eor r8,r28 + eor r9,r29 + eor r10,r2 + eor r11,r3 + mov r12,r5 + mov r13,r6 + mov r14,r7 + mov r15,r4 + mov r0,r1 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + or r15,r0 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + movw r24,r8 + movw r16,r10 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r8,r24 + eor r9,r25 + eor r10,r16 + eor r11,r17 + movw r24,r12 + movw r16,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r12,r24 + eor r13,r25 + eor r14,r16 + eor r15,r17 + mov r0,r2 + mov r2,r28 + mov r28,r0 + mov r0,r3 + mov r3,r29 + mov r29,r0 + bst r28,0 + lsr r3 + ror r2 + ror r29 + ror r28 + bld r3,7 + eor r28,r8 + eor r29,r9 + eor r2,r10 + eor r3,r11 + mov r0,r6 + mov r6,r4 + mov r4,r0 + mov r0,r7 + mov r7,r5 + mov r5,r0 + bst r4,0 + lsr r7 + ror r6 + ror r5 + ror r4 + bld r7,7 + eor r4,r12 + eor r5,r13 + eor r6,r14 + eor r7,r15 + movw r8,r28 + movw r10,r2 + lsl r8 + rol r9 + rol r10 + rol r11 + adc r8,r1 + eor r8,r28 + eor r9,r29 + eor r10,r2 + eor r11,r3 + movw r12,r4 + movw r14,r6 + lsl r12 + rol r13 + rol r14 + rol r15 + adc r12,r1 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + mov r24,r15 + mov r25,r12 + mov r16,r13 + mov r17,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r28,r24 + eor r29,r25 + eor r2,r16 + eor r3,r17 + mov r24,r11 + mov r25,r8 + mov r16,r9 + mov r17,r10 + bst r24,0 + lsr r17 + ror r16 + ror r25 + ror r24 + bld r17,7 + eor r4,r24 + eor r5,r25 + eor r6,r16 + eor r7,r17 + lsl r10 + rol r11 + rol r8 + rol r9 + adc r10,r1 + eor r28,r10 + eor r29,r11 + eor r2,r8 + eor r3,r9 + lsl r14 + rol r15 + rol r12 + rol r13 + adc r14,r1 + eor r4,r14 + eor r5,r15 + eor r6,r12 + eor r7,r13 + ret +1553: + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+16 + ldd r23,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + ldd r28,Z+32 + ldd r29,Z+33 + ldd r2,Z+34 + ldd r3,Z+35 + ldd r4,Z+48 + ldd r5,Z+49 + ldd r6,Z+50 + ldd r7,Z+51 + movw r8,r18 + movw r10,r20 + eor r8,r22 + eor r9,r23 + eor r10,r26 + eor r11,r27 + movw r12,r28 + movw r14,r2 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + eor r18,r12 + eor r19,r13 + eor r20,r14 + eor r21,r15 + eor r22,r12 + eor r23,r13 + eor r26,r14 + eor r27,r15 + eor r28,r8 + eor r29,r9 + eor r2,r10 + eor r3,r11 + eor r4,r8 + eor r5,r9 + eor r6,r10 + eor r7,r11 + st Z,r22 + std Z+1,r23 + std Z+2,r26 + std Z+3,r27 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+32,r4 + std Z+33,r5 + std Z+34,r6 + std Z+35,r7 + std Z+48,r28 + std Z+49,r29 + std Z+50,r2 + std Z+51,r3 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r20,Z+6 + ldd r21,Z+7 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+36 + ldd r29,Z+37 + ldd r2,Z+38 + ldd r3,Z+39 + ldd r4,Z+52 + ldd r5,Z+53 + ldd r6,Z+54 + ldd r7,Z+55 + movw r8,r18 + movw r10,r20 + eor r8,r22 + eor r9,r23 + eor r10,r26 + eor r11,r27 + movw r12,r28 + movw r14,r2 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + eor r18,r12 + eor r19,r13 + eor r20,r14 + eor r21,r15 + eor r22,r12 + eor r23,r13 + eor r26,r14 + eor r27,r15 + eor r28,r8 + eor r29,r9 + eor r2,r10 + eor r3,r11 + eor r4,r8 + eor r5,r9 + eor r6,r10 + eor r7,r11 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + std Z+36,r4 + std Z+37,r5 + std Z+38,r6 + std Z+39,r7 + std Z+52,r28 + std Z+53,r29 + std Z+54,r2 + std Z+55,r3 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r20,Z+10 + ldd r21,Z+11 + ldd r22,Z+24 + ldd r23,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+56 + ldd r5,Z+57 + ldd r6,Z+58 + ldd r7,Z+59 + movw r8,r18 + movw r10,r20 + eor r8,r22 + eor r9,r23 + eor r10,r26 + eor r11,r27 + movw r12,r28 + movw r14,r2 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + eor r18,r12 + eor r19,r13 + eor r20,r14 + eor r21,r15 + eor r22,r12 + eor r23,r13 + eor r26,r14 + eor r27,r15 + eor r28,r8 + eor r29,r9 + eor r2,r10 + eor r3,r11 + eor r4,r8 + eor r5,r9 + eor r6,r10 + eor r7,r11 + std Z+8,r22 + std Z+9,r23 + std Z+10,r26 + std Z+11,r27 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + std Z+40,r4 + std Z+41,r5 + std Z+42,r6 + std Z+43,r7 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r20,Z+14 + ldd r21,Z+15 + ldd r22,Z+28 + ldd r23,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + ldd r28,Z+44 + ldd r29,Z+45 + ldd r2,Z+46 + ldd r3,Z+47 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + movw r8,r18 + movw r10,r20 + eor r8,r22 + eor r9,r23 + eor r10,r26 + eor r11,r27 + movw r12,r28 + movw r14,r2 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + eor r18,r12 + eor r19,r13 + eor r20,r14 + eor r21,r15 + eor r22,r12 + eor r23,r13 + eor r26,r14 + eor r27,r15 + eor r28,r8 + eor r29,r9 + eor r2,r10 + eor r3,r11 + eor r4,r8 + eor r5,r9 + eor r6,r10 + eor r7,r11 + std Z+12,r22 + std Z+13,r23 + std Z+14,r26 + std Z+15,r27 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + std Z+60,r28 + std Z+61,r29 + std Z+62,r2 + std Z+63,r3 + ret +1795: + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size shadow512, .-shadow512 + + .text +.global shadow384 + .type shadow384, @function +shadow384: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 814f + rcall 858f + ldi r25,1 + eor r18,r25 + rcall 814f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 814f + rcall 858f + ldi r25,2 + eor r18,r25 + rcall 814f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 814f + rcall 858f + ldi r25,4 + eor r18,r25 + rcall 814f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + rcall 1284f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 814f + rcall 858f + ldi r25,1 + eor r28,r25 + rcall 814f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 814f + rcall 858f + ldi r25,2 + eor r28,r25 + rcall 814f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 814f + rcall 858f + ldi r25,4 + eor r28,r25 + rcall 814f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + rcall 1284f + ldd r18,Z+12 + ldi r25,1 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+28 + ldi r24,2 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+44 + ldi r17,4 + eor r18,r17 + std Z+44,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 814f + rcall 858f + ldi r25,1 + eor r18,r25 + eor r22,r25 + rcall 814f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 814f + rcall 858f + ldi r25,2 + eor r18,r25 + eor r22,r25 + rcall 814f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 814f + rcall 858f + ldi r25,4 + eor r18,r25 + eor r22,r25 + rcall 814f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + rcall 1284f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+8 + eor r18,r25 + std Z+8,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+24 + eor r18,r24 + std Z+24,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+40 + eor r18,r17 + std Z+40,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 814f + rcall 858f + ldi r25,1 + eor r28,r25 + eor r4,r25 + rcall 814f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 814f + rcall 858f + ldi r25,2 + eor r28,r25 + eor r4,r25 + rcall 814f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 814f + rcall 858f + ldi r25,4 + eor r28,r25 + eor r4,r25 + rcall 814f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + rcall 1284f + ld r18,Z + ldi r25,1 + eor r18,r25 + st Z,r18 + ldd r18,Z+4 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+12 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+16 + ldi r24,2 + eor r18,r24 + std Z+16,r18 + ldd r18,Z+20 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+28 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+32 + ldi r17,4 + eor r18,r17 + std Z+32,r18 + ldd r18,Z+36 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+44 + eor r18,r17 + std Z+44,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 814f + rcall 858f + ldi r25,1 + eor r18,r25 + eor r28,r25 + rcall 814f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 814f + rcall 858f + ldi r25,2 + eor r18,r25 + eor r28,r25 + rcall 814f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 814f + rcall 858f + ldi r25,4 + eor r18,r25 + eor r28,r25 + rcall 814f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + rcall 1284f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+12 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+28 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+44 + eor r18,r17 + std Z+44,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 814f + rcall 858f + ldi r25,1 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 814f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 814f + rcall 858f + ldi r25,2 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 814f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 814f + rcall 858f + ldi r25,4 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 814f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + rcall 1284f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+8 + eor r18,r25 + std Z+8,r18 + ldd r18,Z+12 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+24 + eor r18,r24 + std Z+24,r18 + ldd r18,Z+28 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+40 + eor r18,r17 + std Z+40,r18 + ldd r18,Z+44 + eor r18,r17 + std Z+44,r18 + rjmp 1430f +814: + movw r8,r18 + movw r10,r20 + and r8,r22 + and r9,r23 + and r10,r26 + and r11,r27 + eor r8,r28 + eor r9,r29 + eor r10,r2 + eor r11,r3 + movw r12,r4 + movw r14,r6 + and r12,r18 + and r13,r19 + and r14,r20 + and r15,r21 + eor r12,r22 + eor r13,r23 + eor r14,r26 + eor r15,r27 + movw r28,r8 + movw r2,r10 + and r28,r12 + and r29,r13 + and r2,r14 + and r3,r15 + eor r28,r4 + eor r29,r5 + eor r2,r6 + eor r3,r7 + and r4,r8 + and r5,r9 + and r6,r10 + and r7,r11 + eor r4,r18 + eor r5,r19 + eor r6,r20 + eor r7,r21 + movw r18,r12 + movw r20,r14 + movw r22,r8 + movw r26,r10 + ret +858: + mov r8,r19 + mov r9,r20 + mov r10,r21 + mov r11,r18 + mov r0,r1 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + or r11,r0 + eor r8,r18 + eor r9,r19 + eor r10,r20 + eor r11,r21 + mov r12,r23 + mov r13,r26 + mov r14,r27 + mov r15,r22 + mov r0,r1 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + or r15,r0 + eor r12,r22 + eor r13,r23 + eor r14,r26 + eor r15,r27 + movw r24,r8 + movw r16,r10 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r8,r24 + eor r9,r25 + eor r10,r16 + eor r11,r17 + movw r24,r12 + movw r16,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r12,r24 + eor r13,r25 + eor r14,r16 + eor r15,r17 + mov r0,r20 + mov r20,r18 + mov r18,r0 + mov r0,r21 + mov r21,r19 + mov r19,r0 + bst r18,0 + lsr r21 + ror r20 + ror r19 + ror r18 + bld r21,7 + eor r18,r8 + eor r19,r9 + eor r20,r10 + eor r21,r11 + mov r0,r26 + mov r26,r22 + mov r22,r0 + mov r0,r27 + mov r27,r23 + mov r23,r0 + bst r22,0 + lsr r27 + ror r26 + ror r23 + ror r22 + bld r27,7 + eor r22,r12 + eor r23,r13 + eor r26,r14 + eor r27,r15 + movw r8,r18 + movw r10,r20 + lsl r8 + rol r9 + rol r10 + rol r11 + adc r8,r1 + eor r8,r18 + eor r9,r19 + eor r10,r20 + eor r11,r21 + movw r12,r22 + movw r14,r26 + lsl r12 + rol r13 + rol r14 + rol r15 + adc r12,r1 + eor r12,r22 + eor r13,r23 + eor r14,r26 + eor r15,r27 + mov r24,r15 + mov r25,r12 + mov r16,r13 + mov r17,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r18,r24 + eor r19,r25 + eor r20,r16 + eor r21,r17 + mov r24,r11 + mov r25,r8 + mov r16,r9 + mov r17,r10 + bst r24,0 + lsr r17 + ror r16 + ror r25 + ror r24 + bld r17,7 + eor r22,r24 + eor r23,r25 + eor r26,r16 + eor r27,r17 + lsl r10 + rol r11 + rol r8 + rol r9 + adc r10,r1 + eor r18,r10 + eor r19,r11 + eor r20,r8 + eor r21,r9 + lsl r14 + rol r15 + rol r12 + rol r13 + adc r14,r1 + eor r22,r14 + eor r23,r15 + eor r26,r12 + eor r27,r13 + mov r8,r29 + mov r9,r2 + mov r10,r3 + mov r11,r28 + mov r0,r1 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + or r11,r0 + eor r8,r28 + eor r9,r29 + eor r10,r2 + eor r11,r3 + mov r12,r5 + mov r13,r6 + mov r14,r7 + mov r15,r4 + mov r0,r1 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + or r15,r0 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + movw r24,r8 + movw r16,r10 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r8,r24 + eor r9,r25 + eor r10,r16 + eor r11,r17 + movw r24,r12 + movw r16,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r12,r24 + eor r13,r25 + eor r14,r16 + eor r15,r17 + mov r0,r2 + mov r2,r28 + mov r28,r0 + mov r0,r3 + mov r3,r29 + mov r29,r0 + bst r28,0 + lsr r3 + ror r2 + ror r29 + ror r28 + bld r3,7 + eor r28,r8 + eor r29,r9 + eor r2,r10 + eor r3,r11 + mov r0,r6 + mov r6,r4 + mov r4,r0 + mov r0,r7 + mov r7,r5 + mov r5,r0 + bst r4,0 + lsr r7 + ror r6 + ror r5 + ror r4 + bld r7,7 + eor r4,r12 + eor r5,r13 + eor r6,r14 + eor r7,r15 + movw r8,r28 + movw r10,r2 + lsl r8 + rol r9 + rol r10 + rol r11 + adc r8,r1 + eor r8,r28 + eor r9,r29 + eor r10,r2 + eor r11,r3 + movw r12,r4 + movw r14,r6 + lsl r12 + rol r13 + rol r14 + rol r15 + adc r12,r1 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + mov r24,r15 + mov r25,r12 + mov r16,r13 + mov r17,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r28,r24 + eor r29,r25 + eor r2,r16 + eor r3,r17 + mov r24,r11 + mov r25,r8 + mov r16,r9 + mov r17,r10 + bst r24,0 + lsr r17 + ror r16 + ror r25 + ror r24 + bld r17,7 + eor r4,r24 + eor r5,r25 + eor r6,r16 + eor r7,r17 + lsl r10 + rol r11 + rol r8 + rol r9 + adc r10,r1 + eor r28,r10 + eor r29,r11 + eor r2,r8 + eor r3,r9 + lsl r14 + rol r15 + rol r12 + rol r13 + adc r14,r1 + eor r4,r14 + eor r5,r15 + eor r6,r12 + eor r7,r13 + ret +1284: + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+16 + ldd r23,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + ldd r28,Z+32 + ldd r29,Z+33 + ldd r2,Z+34 + ldd r3,Z+35 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + std Z+32,r22 + std Z+33,r23 + std Z+34,r26 + std Z+35,r27 + eor r22,r28 + eor r23,r29 + eor r26,r2 + eor r27,r3 + st Z,r22 + std Z+1,r23 + std Z+2,r26 + std Z+3,r27 + eor r18,r28 + eor r19,r29 + eor r20,r2 + eor r21,r3 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r20,Z+6 + ldd r21,Z+7 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+36 + ldd r29,Z+37 + ldd r2,Z+38 + ldd r3,Z+39 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + eor r22,r28 + eor r23,r29 + eor r26,r2 + eor r27,r3 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + eor r18,r28 + eor r19,r29 + eor r20,r2 + eor r21,r3 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r20,Z+10 + ldd r21,Z+11 + ldd r22,Z+24 + ldd r23,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + std Z+40,r22 + std Z+41,r23 + std Z+42,r26 + std Z+43,r27 + eor r22,r28 + eor r23,r29 + eor r26,r2 + eor r27,r3 + std Z+8,r22 + std Z+9,r23 + std Z+10,r26 + std Z+11,r27 + eor r18,r28 + eor r19,r29 + eor r20,r2 + eor r21,r3 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r20,Z+14 + ldd r21,Z+15 + ldd r22,Z+28 + ldd r23,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + ldd r28,Z+44 + ldd r29,Z+45 + ldd r2,Z+46 + ldd r3,Z+47 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + std Z+44,r22 + std Z+45,r23 + std Z+46,r26 + std Z+47,r27 + eor r22,r28 + eor r23,r29 + eor r26,r2 + eor r27,r3 + std Z+12,r22 + std Z+13,r23 + std Z+14,r26 + std Z+15,r27 + eor r18,r28 + eor r19,r29 + eor r20,r2 + eor r21,r3 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ret +1430: + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size shadow384, .-shadow384 + +#endif diff --git a/spook/Implementations/crypto_aead/spook128mu384v1/rhys/internal-spook.c b/spook/Implementations/crypto_aead/spook128mu384v1/rhys/internal-spook.c index 0e19216..068938b 100644 --- a/spook/Implementations/crypto_aead/spook128mu384v1/rhys/internal-spook.c +++ b/spook/Implementations/crypto_aead/spook128mu384v1/rhys/internal-spook.c @@ -22,6 +22,8 @@ #include "internal-spook.h" +#if !defined(__AVR__) + /** * \brief Number of steps in the Clyde-128 block cipher. * @@ -43,9 +45,9 @@ static uint8_t const rc[CLYDE128_STEPS][8] = { }; void clyde128_encrypt(const unsigned char key[CLYDE128_KEY_SIZE], - const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4], uint32_t output[CLYDE128_BLOCK_SIZE / 4], - const uint32_t input[CLYDE128_BLOCK_SIZE / 4]) + const uint32_t input[CLYDE128_BLOCK_SIZE / 4], + const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4]) { uint32_t k0, k1, k2, k3; uint32_t t0, t1, t2, t3; @@ -154,9 +156,9 @@ void clyde128_encrypt(const unsigned char key[CLYDE128_KEY_SIZE], } void clyde128_decrypt(const unsigned char key[CLYDE128_KEY_SIZE], - const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4], uint32_t output[CLYDE128_BLOCK_SIZE / 4], - const unsigned char input[CLYDE128_BLOCK_SIZE]) + const unsigned char input[CLYDE128_BLOCK_SIZE], + const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4]) { uint32_t k0, k1, k2, k3; uint32_t t0, t1, t2, t3; @@ -555,3 +557,5 @@ void shadow384(shadow384_state_t *state) le_store_word32(state->B + 44, s23); #endif } + +#endif /* !__AVR__ */ diff --git a/spook/Implementations/crypto_aead/spook128mu384v1/rhys/internal-spook.h b/spook/Implementations/crypto_aead/spook128mu384v1/rhys/internal-spook.h index b08ce80..77c8b86 100644 --- a/spook/Implementations/crypto_aead/spook128mu384v1/rhys/internal-spook.h +++ b/spook/Implementations/crypto_aead/spook128mu384v1/rhys/internal-spook.h @@ -93,31 +93,31 @@ typedef union * \brief Encrypts a block with the Clyde-128 block cipher. * * \param key Points to the key to encrypt with. - * \param tweak Points to the tweak to encrypt with. * \param output Output buffer for the ciphertext. * \param input Input buffer for the plaintext. + * \param tweak Points to the tweak to encrypt with. * * \sa clyde128_decrypt() */ void clyde128_encrypt(const unsigned char key[CLYDE128_KEY_SIZE], - const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4], uint32_t output[CLYDE128_BLOCK_SIZE / 4], - const uint32_t input[CLYDE128_BLOCK_SIZE / 4]); + const uint32_t input[CLYDE128_BLOCK_SIZE / 4], + const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4]); /** * \brief Decrypts a block with the Clyde-128 block cipher. * * \param key Points to the key to decrypt with. - * \param tweak Points to the tweak to decrypt with. * \param output Output buffer for the plaintext. * \param input Input buffer for the ciphertext. + * \param tweak Points to the tweak to decrypt with. * * \sa clyde128_encrypt() */ void clyde128_decrypt(const unsigned char key[CLYDE128_KEY_SIZE], - const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4], uint32_t output[CLYDE128_BLOCK_SIZE / 4], - const unsigned char input[CLYDE128_BLOCK_SIZE]); + const unsigned char input[CLYDE128_BLOCK_SIZE], + const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4]); /** * \brief Performs the Shadow-512 permutation on a state. diff --git a/spook/Implementations/crypto_aead/spook128mu384v1/rhys/spook.c b/spook/Implementations/crypto_aead/spook128mu384v1/rhys/spook.c index d075b33..2dbab94 100644 --- a/spook/Implementations/crypto_aead/spook128mu384v1/rhys/spook.c +++ b/spook/Implementations/crypto_aead/spook128mu384v1/rhys/spook.c @@ -86,7 +86,7 @@ static void spook_128_512_init state->B[CLYDE128_BLOCK_SIZE - 1] |= 0x40; } memcpy(state->B + CLYDE128_BLOCK_SIZE, npub, CLYDE128_BLOCK_SIZE); - clyde128_encrypt(k, state->W, state->W + 12, state->W + 4); + clyde128_encrypt(k, state->W + 12, state->W + 4, state->W); shadow512(state); } @@ -111,7 +111,7 @@ static void spook_128_384_init state->B[CLYDE128_BLOCK_SIZE - 1] |= 0x40; } memcpy(state->B + CLYDE128_BLOCK_SIZE, npub, CLYDE128_BLOCK_SIZE); - clyde128_encrypt(k, state->W, state->W + 8, state->W + 4); + clyde128_encrypt(k, state->W + 8, state->W + 4, state->W); shadow384(state); } @@ -310,7 +310,7 @@ int spook_128_512_su_aead_encrypt /* Compute the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_encrypt(k, state.W + 4, state.W, state.W); + clyde128_encrypt(k, state.W, state.W, state.W + 4); memcpy(c + mlen, state.B, SPOOK_TAG_SIZE); return 0; } @@ -345,7 +345,7 @@ int spook_128_512_su_aead_decrypt /* Check the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_decrypt(k, state.W + 4, state.W + 4, c + clen); + clyde128_decrypt(k, state.W + 4, c + clen, state.W + 4); return aead_check_tag (m, clen, state.B, state.B + CLYDE128_BLOCK_SIZE, SPOOK_TAG_SIZE); } @@ -377,7 +377,7 @@ int spook_128_384_su_aead_encrypt /* Compute the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_encrypt(k, state.W + 4, state.W, state.W); + clyde128_encrypt(k, state.W, state.W, state.W + 4); memcpy(c + mlen, state.B, SPOOK_TAG_SIZE); return 0; } @@ -412,7 +412,7 @@ int spook_128_384_su_aead_decrypt /* Check the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_decrypt(k, state.W + 4, state.W + 4, c + clen); + clyde128_decrypt(k, state.W + 4, c + clen, state.W + 4); return aead_check_tag (m, clen, state.B, state.B + CLYDE128_BLOCK_SIZE, SPOOK_TAG_SIZE); } @@ -444,7 +444,7 @@ int spook_128_512_mu_aead_encrypt /* Compute the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_encrypt(k, state.W + 4, state.W, state.W); + clyde128_encrypt(k, state.W, state.W, state.W + 4); memcpy(c + mlen, state.B, SPOOK_TAG_SIZE); return 0; } @@ -479,7 +479,7 @@ int spook_128_512_mu_aead_decrypt /* Check the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_decrypt(k, state.W + 4, state.W + 4, c + clen); + clyde128_decrypt(k, state.W + 4, c + clen, state.W + 4); return aead_check_tag (m, clen, state.B, state.B + CLYDE128_BLOCK_SIZE, SPOOK_TAG_SIZE); } @@ -511,7 +511,7 @@ int spook_128_384_mu_aead_encrypt /* Compute the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_encrypt(k, state.W + 4, state.W, state.W); + clyde128_encrypt(k, state.W, state.W, state.W + 4); memcpy(c + mlen, state.B, SPOOK_TAG_SIZE); return 0; } @@ -546,7 +546,7 @@ int spook_128_384_mu_aead_decrypt /* Check the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_decrypt(k, state.W + 4, state.W + 4, c + clen); + clyde128_decrypt(k, state.W + 4, c + clen, state.W + 4); return aead_check_tag (m, clen, state.B, state.B + CLYDE128_BLOCK_SIZE, SPOOK_TAG_SIZE); } diff --git a/spook/Implementations/crypto_aead/spook128mu512v1/rhys/internal-spook-avr.S b/spook/Implementations/crypto_aead/spook128mu512v1/rhys/internal-spook-avr.S new file mode 100644 index 0000000..05ca51e --- /dev/null +++ b/spook/Implementations/crypto_aead/spook128mu512v1/rhys/internal-spook-avr.S @@ -0,0 +1,4898 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .text +.global clyde128_encrypt + .type clyde128_encrypt, @function +clyde128_encrypt: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r23 + push r22 + movw r30,r24 + movw r26,r20 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 36 + ld r20,X+ + ld r21,X+ + ld r22,X+ + ld r23,X+ + ld r2,X+ + ld r3,X+ + ld r4,X+ + ld r5,X+ + ld r6,X+ + ld r7,X+ + ld r8,X+ + ld r9,X+ + ld r10,X+ + ld r11,X+ + ld r12,X+ + ld r13,X+ + ld r0,Z + eor r20,r0 + ldd r0,Z+1 + eor r21,r0 + ldd r0,Z+2 + eor r22,r0 + ldd r0,Z+3 + eor r23,r0 + ldd r0,Z+4 + eor r2,r0 + ldd r0,Z+5 + eor r3,r0 + ldd r0,Z+6 + eor r4,r0 + ldd r0,Z+7 + eor r5,r0 + ldd r0,Z+8 + eor r6,r0 + ldd r0,Z+9 + eor r7,r0 + ldd r0,Z+10 + eor r8,r0 + ldd r0,Z+11 + eor r9,r0 + ldd r0,Z+12 + eor r10,r0 + ldd r0,Z+13 + eor r11,r0 + ldd r0,Z+14 + eor r12,r0 + ldd r0,Z+15 + eor r13,r0 + movw r26,r18 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+1,r18 + std Y+2,r19 + std Y+3,r14 + std Y+4,r15 + eor r20,r18 + eor r21,r19 + eor r22,r14 + eor r23,r15 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+5,r18 + std Y+6,r19 + std Y+7,r14 + std Y+8,r15 + eor r2,r18 + eor r3,r19 + eor r4,r14 + eor r5,r15 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+9,r18 + std Y+10,r19 + std Y+11,r14 + std Y+12,r15 + eor r6,r18 + eor r7,r19 + eor r8,r14 + eor r9,r15 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+13,r18 + std Y+14,r19 + std Y+15,r14 + std Y+16,r15 + eor r10,r18 + eor r11,r19 + eor r12,r14 + eor r13,r15 + rcall 151f + ldi r27,1 + eor r20,r27 + rcall 151f + ldi r27,1 + eor r2,r27 + rcall 635f + rcall 151f + ldi r27,1 + eor r6,r27 + rcall 151f + ldi r27,1 + eor r10,r27 + rcall 635f + rcall 151f + ldi r27,1 + eor r20,r27 + eor r2,r27 + rcall 151f + ldi r27,1 + eor r2,r27 + eor r6,r27 + rcall 635f + rcall 151f + ldi r27,1 + eor r6,r27 + eor r10,r27 + rcall 151f + ldi r27,1 + eor r20,r27 + eor r2,r27 + eor r10,r27 + rcall 635f + rcall 151f + ldi r27,1 + eor r20,r27 + eor r6,r27 + rcall 151f + ldi r27,1 + eor r2,r27 + eor r10,r27 + rcall 635f + rcall 151f + ldi r27,1 + eor r20,r27 + eor r2,r27 + eor r6,r27 + rcall 151f + ldi r27,1 + eor r2,r27 + eor r6,r27 + eor r10,r27 + rcall 635f + rjmp 725f +151: + movw r18,r20 + movw r14,r22 + and r18,r2 + and r19,r3 + and r14,r4 + and r15,r5 + eor r18,r6 + eor r19,r7 + eor r14,r8 + eor r15,r9 + movw r24,r10 + movw r16,r12 + and r24,r20 + and r25,r21 + and r16,r22 + and r17,r23 + eor r24,r2 + eor r25,r3 + eor r16,r4 + eor r17,r5 + movw r6,r18 + movw r8,r14 + and r6,r24 + and r7,r25 + and r8,r16 + and r9,r17 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + and r10,r18 + and r11,r19 + and r12,r14 + and r13,r15 + eor r10,r20 + eor r11,r21 + eor r12,r22 + eor r13,r23 + movw r20,r24 + movw r22,r16 + movw r2,r18 + movw r4,r14 + mov r18,r21 + mov r19,r22 + mov r14,r23 + mov r15,r20 + eor r0,r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + or r15,r0 + eor r18,r20 + eor r19,r21 + eor r14,r22 + eor r15,r23 + mov r24,r3 + mov r25,r4 + mov r16,r5 + mov r17,r2 + eor r0,r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r24,r2 + eor r25,r3 + eor r16,r4 + eor r17,r5 + movw r26,r18 + mov r1,r14 + mov r0,r15 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r18,r26 + eor r19,r27 + eor r14,r1 + eor r15,r0 + movw r26,r24 + mov r1,r16 + mov r0,r17 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r24,r26 + eor r25,r27 + eor r16,r1 + eor r17,r0 + mov r0,r22 + mov r22,r20 + mov r20,r0 + mov r0,r23 + mov r23,r21 + mov r21,r0 + bst r20,0 + lsr r23 + ror r22 + ror r21 + ror r20 + bld r23,7 + eor r20,r18 + eor r21,r19 + eor r22,r14 + eor r23,r15 + mov r0,r4 + mov r4,r2 + mov r2,r0 + mov r0,r5 + mov r5,r3 + mov r3,r0 + bst r2,0 + lsr r5 + ror r4 + ror r3 + ror r2 + bld r5,7 + eor r2,r24 + eor r3,r25 + eor r4,r16 + eor r5,r17 + movw r18,r20 + movw r14,r22 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + eor r18,r20 + eor r19,r21 + eor r14,r22 + eor r15,r23 + movw r24,r2 + movw r16,r4 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + eor r24,r2 + eor r25,r3 + eor r16,r4 + eor r17,r5 + mov r26,r17 + mov r27,r24 + mov r1,r25 + mov r0,r16 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r20,r26 + eor r21,r27 + eor r22,r1 + eor r23,r0 + mov r26,r15 + mov r27,r18 + mov r1,r19 + mov r0,r14 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r2,r26 + eor r3,r27 + eor r4,r1 + eor r5,r0 + bst r19,7 + lsl r14 + rol r15 + rol r18 + rol r19 + bld r14,0 + eor r20,r14 + eor r21,r15 + eor r22,r18 + eor r23,r19 + bst r25,7 + lsl r16 + rol r17 + rol r24 + rol r25 + bld r16,0 + eor r2,r16 + eor r3,r17 + eor r4,r24 + eor r5,r25 + mov r18,r7 + mov r19,r8 + mov r14,r9 + mov r15,r6 + eor r0,r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + or r15,r0 + eor r18,r6 + eor r19,r7 + eor r14,r8 + eor r15,r9 + mov r24,r11 + mov r25,r12 + mov r16,r13 + mov r17,r10 + eor r0,r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r24,r10 + eor r25,r11 + eor r16,r12 + eor r17,r13 + movw r26,r18 + mov r1,r14 + mov r0,r15 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r18,r26 + eor r19,r27 + eor r14,r1 + eor r15,r0 + movw r26,r24 + mov r1,r16 + mov r0,r17 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r24,r26 + eor r25,r27 + eor r16,r1 + eor r17,r0 + mov r0,r8 + mov r8,r6 + mov r6,r0 + mov r0,r9 + mov r9,r7 + mov r7,r0 + bst r6,0 + lsr r9 + ror r8 + ror r7 + ror r6 + bld r9,7 + eor r6,r18 + eor r7,r19 + eor r8,r14 + eor r9,r15 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + bst r10,0 + lsr r13 + ror r12 + ror r11 + ror r10 + bld r13,7 + eor r10,r24 + eor r11,r25 + eor r12,r16 + eor r13,r17 + movw r18,r6 + movw r14,r8 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + eor r18,r6 + eor r19,r7 + eor r14,r8 + eor r15,r9 + movw r24,r10 + movw r16,r12 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + eor r24,r10 + eor r25,r11 + eor r16,r12 + eor r17,r13 + mov r26,r17 + mov r27,r24 + mov r1,r25 + mov r0,r16 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r6,r26 + eor r7,r27 + eor r8,r1 + eor r9,r0 + mov r26,r15 + mov r27,r18 + mov r1,r19 + mov r0,r14 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r10,r26 + eor r11,r27 + eor r12,r1 + eor r13,r0 + bst r19,7 + lsl r14 + rol r15 + rol r18 + rol r19 + bld r14,0 + eor r6,r14 + eor r7,r15 + eor r8,r18 + eor r9,r19 + bst r25,7 + lsl r16 + rol r17 + rol r24 + rol r25 + bld r16,0 + eor r10,r16 + eor r11,r17 + eor r12,r24 + eor r13,r25 + ret +635: + ldd r18,Y+9 + ldd r19,Y+10 + ldd r14,Y+11 + ldd r15,Y+12 + ldd r24,Y+1 + ldd r25,Y+2 + ldd r16,Y+3 + ldd r17,Y+4 + eor r18,r24 + eor r19,r25 + eor r14,r16 + eor r15,r17 + std Y+9,r24 + std Y+10,r25 + std Y+11,r16 + std Y+12,r17 + std Y+1,r18 + std Y+2,r19 + std Y+3,r14 + std Y+4,r15 + eor r20,r18 + eor r21,r19 + eor r22,r14 + eor r23,r15 + eor r6,r24 + eor r7,r25 + eor r8,r16 + eor r9,r17 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r14,Y+15 + ldd r15,Y+16 + ldd r24,Y+5 + ldd r25,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + eor r18,r24 + eor r19,r25 + eor r14,r16 + eor r15,r17 + std Y+13,r24 + std Y+14,r25 + std Y+15,r16 + std Y+16,r17 + std Y+5,r18 + std Y+6,r19 + std Y+7,r14 + std Y+8,r15 + eor r2,r18 + eor r3,r19 + eor r4,r14 + eor r5,r15 + eor r10,r24 + eor r11,r25 + eor r12,r16 + eor r13,r17 + ld r0,Z + eor r20,r0 + ldd r0,Z+1 + eor r21,r0 + ldd r0,Z+2 + eor r22,r0 + ldd r0,Z+3 + eor r23,r0 + ldd r0,Z+4 + eor r2,r0 + ldd r0,Z+5 + eor r3,r0 + ldd r0,Z+6 + eor r4,r0 + ldd r0,Z+7 + eor r5,r0 + ldd r0,Z+8 + eor r6,r0 + ldd r0,Z+9 + eor r7,r0 + ldd r0,Z+10 + eor r8,r0 + ldd r0,Z+11 + eor r9,r0 + ldd r0,Z+12 + eor r10,r0 + ldd r0,Z+13 + eor r11,r0 + ldd r0,Z+14 + eor r12,r0 + ldd r0,Z+15 + eor r13,r0 + ret +725: + ldd r26,Y+17 + ldd r27,Y+18 + st X+,r20 + st X+,r21 + st X+,r22 + st X+,r23 + st X+,r2 + st X+,r3 + st X+,r4 + st X+,r5 + st X+,r6 + st X+,r7 + st X+,r8 + st X+,r9 + st X+,r10 + st X+,r11 + st X+,r12 + st X+,r13 + adiw r28,18 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size clyde128_encrypt, .-clyde128_encrypt + + .text +.global clyde128_decrypt + .type clyde128_decrypt, @function +clyde128_decrypt: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r23 + push r22 + movw r30,r24 + movw r26,r20 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 36 + ld r20,X+ + ld r21,X+ + ld r22,X+ + ld r23,X+ + ld r2,X+ + ld r3,X+ + ld r4,X+ + ld r5,X+ + ld r6,X+ + ld r7,X+ + ld r8,X+ + ld r9,X+ + ld r10,X+ + ld r11,X+ + ld r12,X+ + ld r13,X+ + movw r26,r18 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+1,r18 + std Y+2,r19 + std Y+3,r14 + std Y+4,r15 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+5,r18 + std Y+6,r19 + std Y+7,r14 + std Y+8,r15 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+9,r18 + std Y+10,r19 + std Y+11,r14 + std Y+12,r15 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+13,r18 + std Y+14,r19 + std Y+15,r14 + std Y+16,r15 + rcall 533f + ldi r27,1 + eor r2,r27 + eor r6,r27 + eor r10,r27 + rcall 103f + ldi r27,1 + eor r20,r27 + eor r2,r27 + eor r6,r27 + rcall 103f + rcall 533f + ldi r27,1 + eor r2,r27 + eor r10,r27 + rcall 103f + ldi r27,1 + eor r20,r27 + eor r6,r27 + rcall 103f + rcall 533f + ldi r27,1 + eor r20,r27 + eor r2,r27 + eor r10,r27 + rcall 103f + ldi r27,1 + eor r6,r27 + eor r10,r27 + rcall 103f + rcall 533f + ldi r27,1 + eor r2,r27 + eor r6,r27 + rcall 103f + ldi r27,1 + eor r20,r27 + eor r2,r27 + rcall 103f + rcall 533f + ldi r27,1 + eor r10,r27 + rcall 103f + ldi r27,1 + eor r6,r27 + rcall 103f + rcall 533f + ldi r27,1 + eor r2,r27 + rcall 103f + ldi r27,1 + eor r20,r27 + rcall 103f + rjmp 623f +103: + mov r18,r23 + mov r19,r20 + mov r14,r21 + mov r15,r22 + bst r18,0 + lsr r15 + ror r14 + ror r19 + ror r18 + bld r15,7 + eor r18,r20 + eor r19,r21 + eor r14,r22 + eor r15,r23 + mov r24,r5 + mov r25,r2 + mov r16,r3 + mov r17,r4 + bst r24,0 + lsr r17 + ror r16 + ror r25 + ror r24 + bld r17,7 + eor r24,r2 + eor r25,r3 + eor r16,r4 + eor r17,r5 + movw r26,r18 + mov r1,r14 + mov r0,r15 + bst r0,7 + lsl r26 + rol r27 + rol r1 + rol r0 + bld r26,0 + eor r20,r26 + eor r21,r27 + eor r22,r1 + eor r23,r0 + movw r26,r24 + mov r1,r16 + mov r0,r17 + bst r0,7 + lsl r26 + rol r27 + rol r1 + rol r0 + bld r26,0 + eor r2,r26 + eor r3,r27 + eor r4,r1 + eor r5,r0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + eor r20,r15 + eor r21,r18 + eor r22,r19 + eor r23,r14 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + eor r2,r17 + eor r3,r24 + eor r4,r25 + eor r5,r16 + movw r18,r20 + movw r14,r22 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + eor r18,r20 + eor r19,r21 + eor r14,r22 + eor r15,r23 + movw r24,r2 + movw r16,r4 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + eor r24,r2 + eor r25,r3 + eor r16,r4 + eor r17,r5 + mov r26,r17 + mov r27,r24 + mov r1,r25 + mov r0,r16 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r20,r26 + eor r21,r27 + eor r22,r1 + eor r23,r0 + mov r26,r15 + mov r27,r18 + mov r1,r19 + mov r0,r14 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r2,r26 + eor r3,r27 + eor r4,r1 + eor r5,r0 + bst r20,0 + lsr r23 + ror r22 + ror r21 + ror r20 + bld r23,7 + eor r18,r22 + eor r19,r23 + eor r14,r20 + eor r15,r21 + bst r2,0 + lsr r5 + ror r4 + ror r3 + ror r2 + bld r5,7 + eor r24,r4 + eor r25,r5 + eor r16,r2 + eor r17,r3 + movw r20,r14 + movw r22,r18 + movw r2,r16 + movw r4,r24 + mov r18,r9 + mov r19,r6 + mov r14,r7 + mov r15,r8 + bst r18,0 + lsr r15 + ror r14 + ror r19 + ror r18 + bld r15,7 + eor r18,r6 + eor r19,r7 + eor r14,r8 + eor r15,r9 + mov r24,r13 + mov r25,r10 + mov r16,r11 + mov r17,r12 + bst r24,0 + lsr r17 + ror r16 + ror r25 + ror r24 + bld r17,7 + eor r24,r10 + eor r25,r11 + eor r16,r12 + eor r17,r13 + movw r26,r18 + mov r1,r14 + mov r0,r15 + bst r0,7 + lsl r26 + rol r27 + rol r1 + rol r0 + bld r26,0 + eor r6,r26 + eor r7,r27 + eor r8,r1 + eor r9,r0 + movw r26,r24 + mov r1,r16 + mov r0,r17 + bst r0,7 + lsl r26 + rol r27 + rol r1 + rol r0 + bld r26,0 + eor r10,r26 + eor r11,r27 + eor r12,r1 + eor r13,r0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + eor r6,r15 + eor r7,r18 + eor r8,r19 + eor r9,r14 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + eor r10,r17 + eor r11,r24 + eor r12,r25 + eor r13,r16 + movw r18,r6 + movw r14,r8 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + eor r18,r6 + eor r19,r7 + eor r14,r8 + eor r15,r9 + movw r24,r10 + movw r16,r12 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + eor r24,r10 + eor r25,r11 + eor r16,r12 + eor r17,r13 + mov r26,r17 + mov r27,r24 + mov r1,r25 + mov r0,r16 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r6,r26 + eor r7,r27 + eor r8,r1 + eor r9,r0 + mov r26,r15 + mov r27,r18 + mov r1,r19 + mov r0,r14 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r10,r26 + eor r11,r27 + eor r12,r1 + eor r13,r0 + bst r6,0 + lsr r9 + ror r8 + ror r7 + ror r6 + bld r9,7 + eor r18,r8 + eor r19,r9 + eor r14,r6 + eor r15,r7 + bst r10,0 + lsr r13 + ror r12 + ror r11 + ror r10 + bld r13,7 + eor r24,r12 + eor r25,r13 + eor r16,r10 + eor r17,r11 + movw r6,r14 + movw r8,r18 + movw r10,r16 + movw r12,r24 + movw r24,r20 + movw r16,r22 + and r24,r2 + and r25,r3 + and r16,r4 + and r17,r5 + eor r24,r6 + eor r25,r7 + eor r16,r8 + eor r17,r9 + movw r18,r2 + movw r14,r4 + and r18,r24 + and r19,r25 + and r14,r16 + and r15,r17 + eor r18,r10 + eor r19,r11 + eor r14,r12 + eor r15,r13 + movw r10,r24 + movw r12,r16 + and r24,r18 + and r25,r19 + and r16,r14 + and r17,r15 + eor r24,r20 + eor r25,r21 + eor r16,r22 + eor r17,r23 + movw r6,r18 + movw r8,r14 + and r6,r24 + and r7,r25 + and r8,r16 + and r9,r17 + eor r6,r2 + eor r7,r3 + eor r8,r4 + eor r9,r5 + movw r20,r18 + movw r22,r14 + movw r2,r24 + movw r4,r16 + ret +533: + ld r0,Z + eor r20,r0 + ldd r0,Z+1 + eor r21,r0 + ldd r0,Z+2 + eor r22,r0 + ldd r0,Z+3 + eor r23,r0 + ldd r0,Z+4 + eor r2,r0 + ldd r0,Z+5 + eor r3,r0 + ldd r0,Z+6 + eor r4,r0 + ldd r0,Z+7 + eor r5,r0 + ldd r0,Z+8 + eor r6,r0 + ldd r0,Z+9 + eor r7,r0 + ldd r0,Z+10 + eor r8,r0 + ldd r0,Z+11 + eor r9,r0 + ldd r0,Z+12 + eor r10,r0 + ldd r0,Z+13 + eor r11,r0 + ldd r0,Z+14 + eor r12,r0 + ldd r0,Z+15 + eor r13,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r14,Y+3 + ldd r15,Y+4 + ldd r24,Y+9 + ldd r25,Y+10 + ldd r16,Y+11 + ldd r17,Y+12 + eor r20,r18 + eor r21,r19 + eor r22,r14 + eor r23,r15 + eor r6,r24 + eor r7,r25 + eor r8,r16 + eor r9,r17 + eor r18,r24 + eor r19,r25 + eor r14,r16 + eor r15,r17 + std Y+1,r24 + std Y+2,r25 + std Y+3,r16 + std Y+4,r17 + std Y+9,r18 + std Y+10,r19 + std Y+11,r14 + std Y+12,r15 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r14,Y+7 + ldd r15,Y+8 + ldd r24,Y+13 + ldd r25,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + eor r2,r18 + eor r3,r19 + eor r4,r14 + eor r5,r15 + eor r10,r24 + eor r11,r25 + eor r12,r16 + eor r13,r17 + eor r18,r24 + eor r19,r25 + eor r14,r16 + eor r15,r17 + std Y+5,r24 + std Y+6,r25 + std Y+7,r16 + std Y+8,r17 + std Y+13,r18 + std Y+14,r19 + std Y+15,r14 + std Y+16,r15 + ret +623: + ld r0,Z + eor r20,r0 + ldd r0,Z+1 + eor r21,r0 + ldd r0,Z+2 + eor r22,r0 + ldd r0,Z+3 + eor r23,r0 + ldd r0,Z+4 + eor r2,r0 + ldd r0,Z+5 + eor r3,r0 + ldd r0,Z+6 + eor r4,r0 + ldd r0,Z+7 + eor r5,r0 + ldd r0,Z+8 + eor r6,r0 + ldd r0,Z+9 + eor r7,r0 + ldd r0,Z+10 + eor r8,r0 + ldd r0,Z+11 + eor r9,r0 + ldd r0,Z+12 + eor r10,r0 + ldd r0,Z+13 + eor r11,r0 + ldd r0,Z+14 + eor r12,r0 + ldd r0,Z+15 + eor r13,r0 + ldd r0,Y+1 + eor r20,r0 + ldd r0,Y+2 + eor r21,r0 + ldd r0,Y+3 + eor r22,r0 + ldd r0,Y+4 + eor r23,r0 + ldd r0,Y+5 + eor r2,r0 + ldd r0,Y+6 + eor r3,r0 + ldd r0,Y+7 + eor r4,r0 + ldd r0,Y+8 + eor r5,r0 + ldd r0,Y+9 + eor r6,r0 + ldd r0,Y+10 + eor r7,r0 + ldd r0,Y+11 + eor r8,r0 + ldd r0,Y+12 + eor r9,r0 + ldd r0,Y+13 + eor r10,r0 + ldd r0,Y+14 + eor r11,r0 + ldd r0,Y+15 + eor r12,r0 + ldd r0,Y+16 + eor r13,r0 + ldd r26,Y+17 + ldd r27,Y+18 + st X+,r20 + st X+,r21 + st X+,r22 + st X+,r23 + st X+,r2 + st X+,r3 + st X+,r4 + st X+,r5 + st X+,r6 + st X+,r7 + st X+,r8 + st X+,r9 + st X+,r10 + st X+,r11 + st X+,r12 + st X+,r13 + adiw r28,18 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size clyde128_decrypt, .-clyde128_decrypt + + .text +.global shadow512 + .type shadow512, @function +shadow512: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 1083f + rcall 1127f + ldi r25,1 + eor r18,r25 + rcall 1083f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 1083f + rcall 1127f + ldi r25,2 + eor r18,r25 + rcall 1083f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 1083f + rcall 1127f + ldi r25,4 + eor r18,r25 + rcall 1083f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + ldd r18,Z+48 + ldd r19,Z+49 + ldd r20,Z+50 + ldd r21,Z+51 + ldd r22,Z+52 + ldd r23,Z+53 + ldd r26,Z+54 + ldd r27,Z+55 + ldd r28,Z+56 + ldd r29,Z+57 + ldd r2,Z+58 + ldd r3,Z+59 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + rcall 1083f + rcall 1127f + ldi r25,8 + eor r18,r25 + rcall 1083f + std Z+48,r18 + std Z+49,r19 + std Z+50,r20 + std Z+51,r21 + std Z+52,r22 + std Z+53,r23 + std Z+54,r26 + std Z+55,r27 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + std Z+60,r4 + std Z+61,r5 + std Z+62,r6 + std Z+63,r7 + rcall 1553f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+52 + ldi r16,8 + eor r18,r16 + std Z+52,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 1083f + rcall 1127f + ldi r25,1 + eor r28,r25 + rcall 1083f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 1083f + rcall 1127f + ldi r25,2 + eor r28,r25 + rcall 1083f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 1083f + rcall 1127f + ldi r25,4 + eor r28,r25 + rcall 1083f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + ldd r18,Z+48 + ldd r19,Z+49 + ldd r20,Z+50 + ldd r21,Z+51 + ldd r22,Z+52 + ldd r23,Z+53 + ldd r26,Z+54 + ldd r27,Z+55 + ldd r28,Z+56 + ldd r29,Z+57 + ldd r2,Z+58 + ldd r3,Z+59 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + rcall 1083f + rcall 1127f + ldi r25,8 + eor r28,r25 + rcall 1083f + std Z+48,r18 + std Z+49,r19 + std Z+50,r20 + std Z+51,r21 + std Z+52,r22 + std Z+53,r23 + std Z+54,r26 + std Z+55,r27 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + std Z+60,r4 + std Z+61,r5 + std Z+62,r6 + std Z+63,r7 + rcall 1553f + ldd r18,Z+12 + ldi r25,1 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+28 + ldi r24,2 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+44 + ldi r17,4 + eor r18,r17 + std Z+44,r18 + ldd r18,Z+60 + ldi r16,8 + eor r18,r16 + std Z+60,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 1083f + rcall 1127f + ldi r25,1 + eor r18,r25 + eor r22,r25 + rcall 1083f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 1083f + rcall 1127f + ldi r25,2 + eor r18,r25 + eor r22,r25 + rcall 1083f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 1083f + rcall 1127f + ldi r25,4 + eor r18,r25 + eor r22,r25 + rcall 1083f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + ldd r18,Z+48 + ldd r19,Z+49 + ldd r20,Z+50 + ldd r21,Z+51 + ldd r22,Z+52 + ldd r23,Z+53 + ldd r26,Z+54 + ldd r27,Z+55 + ldd r28,Z+56 + ldd r29,Z+57 + ldd r2,Z+58 + ldd r3,Z+59 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + rcall 1083f + rcall 1127f + ldi r25,8 + eor r18,r25 + eor r22,r25 + rcall 1083f + std Z+48,r18 + std Z+49,r19 + std Z+50,r20 + std Z+51,r21 + std Z+52,r22 + std Z+53,r23 + std Z+54,r26 + std Z+55,r27 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + std Z+60,r4 + std Z+61,r5 + std Z+62,r6 + std Z+63,r7 + rcall 1553f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+8 + eor r18,r25 + std Z+8,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+24 + eor r18,r24 + std Z+24,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+40 + eor r18,r17 + std Z+40,r18 + ldd r18,Z+52 + ldi r16,8 + eor r18,r16 + std Z+52,r18 + ldd r18,Z+56 + eor r18,r16 + std Z+56,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 1083f + rcall 1127f + ldi r25,1 + eor r28,r25 + eor r4,r25 + rcall 1083f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 1083f + rcall 1127f + ldi r25,2 + eor r28,r25 + eor r4,r25 + rcall 1083f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 1083f + rcall 1127f + ldi r25,4 + eor r28,r25 + eor r4,r25 + rcall 1083f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + ldd r18,Z+48 + ldd r19,Z+49 + ldd r20,Z+50 + ldd r21,Z+51 + ldd r22,Z+52 + ldd r23,Z+53 + ldd r26,Z+54 + ldd r27,Z+55 + ldd r28,Z+56 + ldd r29,Z+57 + ldd r2,Z+58 + ldd r3,Z+59 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + rcall 1083f + rcall 1127f + ldi r25,8 + eor r28,r25 + eor r4,r25 + rcall 1083f + std Z+48,r18 + std Z+49,r19 + std Z+50,r20 + std Z+51,r21 + std Z+52,r22 + std Z+53,r23 + std Z+54,r26 + std Z+55,r27 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + std Z+60,r4 + std Z+61,r5 + std Z+62,r6 + std Z+63,r7 + rcall 1553f + ld r18,Z + ldi r25,1 + eor r18,r25 + st Z,r18 + ldd r18,Z+4 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+12 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+16 + ldi r24,2 + eor r18,r24 + std Z+16,r18 + ldd r18,Z+20 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+28 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+32 + ldi r17,4 + eor r18,r17 + std Z+32,r18 + ldd r18,Z+36 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+44 + eor r18,r17 + std Z+44,r18 + ldd r18,Z+48 + ldi r16,8 + eor r18,r16 + std Z+48,r18 + ldd r18,Z+52 + eor r18,r16 + std Z+52,r18 + ldd r18,Z+60 + eor r18,r16 + std Z+60,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 1083f + rcall 1127f + ldi r25,1 + eor r18,r25 + eor r28,r25 + rcall 1083f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 1083f + rcall 1127f + ldi r25,2 + eor r18,r25 + eor r28,r25 + rcall 1083f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 1083f + rcall 1127f + ldi r25,4 + eor r18,r25 + eor r28,r25 + rcall 1083f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + ldd r18,Z+48 + ldd r19,Z+49 + ldd r20,Z+50 + ldd r21,Z+51 + ldd r22,Z+52 + ldd r23,Z+53 + ldd r26,Z+54 + ldd r27,Z+55 + ldd r28,Z+56 + ldd r29,Z+57 + ldd r2,Z+58 + ldd r3,Z+59 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + rcall 1083f + rcall 1127f + ldi r25,8 + eor r18,r25 + eor r28,r25 + rcall 1083f + std Z+48,r18 + std Z+49,r19 + std Z+50,r20 + std Z+51,r21 + std Z+52,r22 + std Z+53,r23 + std Z+54,r26 + std Z+55,r27 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + std Z+60,r4 + std Z+61,r5 + std Z+62,r6 + std Z+63,r7 + rcall 1553f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+12 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+28 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+44 + eor r18,r17 + std Z+44,r18 + ldd r18,Z+52 + ldi r16,8 + eor r18,r16 + std Z+52,r18 + ldd r18,Z+60 + eor r18,r16 + std Z+60,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 1083f + rcall 1127f + ldi r25,1 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 1083f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 1083f + rcall 1127f + ldi r25,2 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 1083f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 1083f + rcall 1127f + ldi r25,4 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 1083f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + ldd r18,Z+48 + ldd r19,Z+49 + ldd r20,Z+50 + ldd r21,Z+51 + ldd r22,Z+52 + ldd r23,Z+53 + ldd r26,Z+54 + ldd r27,Z+55 + ldd r28,Z+56 + ldd r29,Z+57 + ldd r2,Z+58 + ldd r3,Z+59 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + rcall 1083f + rcall 1127f + ldi r25,8 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 1083f + std Z+48,r18 + std Z+49,r19 + std Z+50,r20 + std Z+51,r21 + std Z+52,r22 + std Z+53,r23 + std Z+54,r26 + std Z+55,r27 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + std Z+60,r4 + std Z+61,r5 + std Z+62,r6 + std Z+63,r7 + rcall 1553f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+8 + eor r18,r25 + std Z+8,r18 + ldd r18,Z+12 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+24 + eor r18,r24 + std Z+24,r18 + ldd r18,Z+28 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+40 + eor r18,r17 + std Z+40,r18 + ldd r18,Z+44 + eor r18,r17 + std Z+44,r18 + ldd r18,Z+52 + ldi r16,8 + eor r18,r16 + std Z+52,r18 + ldd r18,Z+56 + eor r18,r16 + std Z+56,r18 + ldd r18,Z+60 + eor r18,r16 + std Z+60,r18 + rjmp 1795f +1083: + movw r8,r18 + movw r10,r20 + and r8,r22 + and r9,r23 + and r10,r26 + and r11,r27 + eor r8,r28 + eor r9,r29 + eor r10,r2 + eor r11,r3 + movw r12,r4 + movw r14,r6 + and r12,r18 + and r13,r19 + and r14,r20 + and r15,r21 + eor r12,r22 + eor r13,r23 + eor r14,r26 + eor r15,r27 + movw r28,r8 + movw r2,r10 + and r28,r12 + and r29,r13 + and r2,r14 + and r3,r15 + eor r28,r4 + eor r29,r5 + eor r2,r6 + eor r3,r7 + and r4,r8 + and r5,r9 + and r6,r10 + and r7,r11 + eor r4,r18 + eor r5,r19 + eor r6,r20 + eor r7,r21 + movw r18,r12 + movw r20,r14 + movw r22,r8 + movw r26,r10 + ret +1127: + mov r8,r19 + mov r9,r20 + mov r10,r21 + mov r11,r18 + mov r0,r1 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + or r11,r0 + eor r8,r18 + eor r9,r19 + eor r10,r20 + eor r11,r21 + mov r12,r23 + mov r13,r26 + mov r14,r27 + mov r15,r22 + mov r0,r1 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + or r15,r0 + eor r12,r22 + eor r13,r23 + eor r14,r26 + eor r15,r27 + movw r24,r8 + movw r16,r10 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r8,r24 + eor r9,r25 + eor r10,r16 + eor r11,r17 + movw r24,r12 + movw r16,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r12,r24 + eor r13,r25 + eor r14,r16 + eor r15,r17 + mov r0,r20 + mov r20,r18 + mov r18,r0 + mov r0,r21 + mov r21,r19 + mov r19,r0 + bst r18,0 + lsr r21 + ror r20 + ror r19 + ror r18 + bld r21,7 + eor r18,r8 + eor r19,r9 + eor r20,r10 + eor r21,r11 + mov r0,r26 + mov r26,r22 + mov r22,r0 + mov r0,r27 + mov r27,r23 + mov r23,r0 + bst r22,0 + lsr r27 + ror r26 + ror r23 + ror r22 + bld r27,7 + eor r22,r12 + eor r23,r13 + eor r26,r14 + eor r27,r15 + movw r8,r18 + movw r10,r20 + lsl r8 + rol r9 + rol r10 + rol r11 + adc r8,r1 + eor r8,r18 + eor r9,r19 + eor r10,r20 + eor r11,r21 + movw r12,r22 + movw r14,r26 + lsl r12 + rol r13 + rol r14 + rol r15 + adc r12,r1 + eor r12,r22 + eor r13,r23 + eor r14,r26 + eor r15,r27 + mov r24,r15 + mov r25,r12 + mov r16,r13 + mov r17,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r18,r24 + eor r19,r25 + eor r20,r16 + eor r21,r17 + mov r24,r11 + mov r25,r8 + mov r16,r9 + mov r17,r10 + bst r24,0 + lsr r17 + ror r16 + ror r25 + ror r24 + bld r17,7 + eor r22,r24 + eor r23,r25 + eor r26,r16 + eor r27,r17 + lsl r10 + rol r11 + rol r8 + rol r9 + adc r10,r1 + eor r18,r10 + eor r19,r11 + eor r20,r8 + eor r21,r9 + lsl r14 + rol r15 + rol r12 + rol r13 + adc r14,r1 + eor r22,r14 + eor r23,r15 + eor r26,r12 + eor r27,r13 + mov r8,r29 + mov r9,r2 + mov r10,r3 + mov r11,r28 + mov r0,r1 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + or r11,r0 + eor r8,r28 + eor r9,r29 + eor r10,r2 + eor r11,r3 + mov r12,r5 + mov r13,r6 + mov r14,r7 + mov r15,r4 + mov r0,r1 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + or r15,r0 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + movw r24,r8 + movw r16,r10 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r8,r24 + eor r9,r25 + eor r10,r16 + eor r11,r17 + movw r24,r12 + movw r16,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r12,r24 + eor r13,r25 + eor r14,r16 + eor r15,r17 + mov r0,r2 + mov r2,r28 + mov r28,r0 + mov r0,r3 + mov r3,r29 + mov r29,r0 + bst r28,0 + lsr r3 + ror r2 + ror r29 + ror r28 + bld r3,7 + eor r28,r8 + eor r29,r9 + eor r2,r10 + eor r3,r11 + mov r0,r6 + mov r6,r4 + mov r4,r0 + mov r0,r7 + mov r7,r5 + mov r5,r0 + bst r4,0 + lsr r7 + ror r6 + ror r5 + ror r4 + bld r7,7 + eor r4,r12 + eor r5,r13 + eor r6,r14 + eor r7,r15 + movw r8,r28 + movw r10,r2 + lsl r8 + rol r9 + rol r10 + rol r11 + adc r8,r1 + eor r8,r28 + eor r9,r29 + eor r10,r2 + eor r11,r3 + movw r12,r4 + movw r14,r6 + lsl r12 + rol r13 + rol r14 + rol r15 + adc r12,r1 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + mov r24,r15 + mov r25,r12 + mov r16,r13 + mov r17,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r28,r24 + eor r29,r25 + eor r2,r16 + eor r3,r17 + mov r24,r11 + mov r25,r8 + mov r16,r9 + mov r17,r10 + bst r24,0 + lsr r17 + ror r16 + ror r25 + ror r24 + bld r17,7 + eor r4,r24 + eor r5,r25 + eor r6,r16 + eor r7,r17 + lsl r10 + rol r11 + rol r8 + rol r9 + adc r10,r1 + eor r28,r10 + eor r29,r11 + eor r2,r8 + eor r3,r9 + lsl r14 + rol r15 + rol r12 + rol r13 + adc r14,r1 + eor r4,r14 + eor r5,r15 + eor r6,r12 + eor r7,r13 + ret +1553: + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+16 + ldd r23,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + ldd r28,Z+32 + ldd r29,Z+33 + ldd r2,Z+34 + ldd r3,Z+35 + ldd r4,Z+48 + ldd r5,Z+49 + ldd r6,Z+50 + ldd r7,Z+51 + movw r8,r18 + movw r10,r20 + eor r8,r22 + eor r9,r23 + eor r10,r26 + eor r11,r27 + movw r12,r28 + movw r14,r2 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + eor r18,r12 + eor r19,r13 + eor r20,r14 + eor r21,r15 + eor r22,r12 + eor r23,r13 + eor r26,r14 + eor r27,r15 + eor r28,r8 + eor r29,r9 + eor r2,r10 + eor r3,r11 + eor r4,r8 + eor r5,r9 + eor r6,r10 + eor r7,r11 + st Z,r22 + std Z+1,r23 + std Z+2,r26 + std Z+3,r27 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+32,r4 + std Z+33,r5 + std Z+34,r6 + std Z+35,r7 + std Z+48,r28 + std Z+49,r29 + std Z+50,r2 + std Z+51,r3 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r20,Z+6 + ldd r21,Z+7 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+36 + ldd r29,Z+37 + ldd r2,Z+38 + ldd r3,Z+39 + ldd r4,Z+52 + ldd r5,Z+53 + ldd r6,Z+54 + ldd r7,Z+55 + movw r8,r18 + movw r10,r20 + eor r8,r22 + eor r9,r23 + eor r10,r26 + eor r11,r27 + movw r12,r28 + movw r14,r2 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + eor r18,r12 + eor r19,r13 + eor r20,r14 + eor r21,r15 + eor r22,r12 + eor r23,r13 + eor r26,r14 + eor r27,r15 + eor r28,r8 + eor r29,r9 + eor r2,r10 + eor r3,r11 + eor r4,r8 + eor r5,r9 + eor r6,r10 + eor r7,r11 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + std Z+36,r4 + std Z+37,r5 + std Z+38,r6 + std Z+39,r7 + std Z+52,r28 + std Z+53,r29 + std Z+54,r2 + std Z+55,r3 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r20,Z+10 + ldd r21,Z+11 + ldd r22,Z+24 + ldd r23,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+56 + ldd r5,Z+57 + ldd r6,Z+58 + ldd r7,Z+59 + movw r8,r18 + movw r10,r20 + eor r8,r22 + eor r9,r23 + eor r10,r26 + eor r11,r27 + movw r12,r28 + movw r14,r2 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + eor r18,r12 + eor r19,r13 + eor r20,r14 + eor r21,r15 + eor r22,r12 + eor r23,r13 + eor r26,r14 + eor r27,r15 + eor r28,r8 + eor r29,r9 + eor r2,r10 + eor r3,r11 + eor r4,r8 + eor r5,r9 + eor r6,r10 + eor r7,r11 + std Z+8,r22 + std Z+9,r23 + std Z+10,r26 + std Z+11,r27 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + std Z+40,r4 + std Z+41,r5 + std Z+42,r6 + std Z+43,r7 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r20,Z+14 + ldd r21,Z+15 + ldd r22,Z+28 + ldd r23,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + ldd r28,Z+44 + ldd r29,Z+45 + ldd r2,Z+46 + ldd r3,Z+47 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + movw r8,r18 + movw r10,r20 + eor r8,r22 + eor r9,r23 + eor r10,r26 + eor r11,r27 + movw r12,r28 + movw r14,r2 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + eor r18,r12 + eor r19,r13 + eor r20,r14 + eor r21,r15 + eor r22,r12 + eor r23,r13 + eor r26,r14 + eor r27,r15 + eor r28,r8 + eor r29,r9 + eor r2,r10 + eor r3,r11 + eor r4,r8 + eor r5,r9 + eor r6,r10 + eor r7,r11 + std Z+12,r22 + std Z+13,r23 + std Z+14,r26 + std Z+15,r27 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + std Z+60,r28 + std Z+61,r29 + std Z+62,r2 + std Z+63,r3 + ret +1795: + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size shadow512, .-shadow512 + + .text +.global shadow384 + .type shadow384, @function +shadow384: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 814f + rcall 858f + ldi r25,1 + eor r18,r25 + rcall 814f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 814f + rcall 858f + ldi r25,2 + eor r18,r25 + rcall 814f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 814f + rcall 858f + ldi r25,4 + eor r18,r25 + rcall 814f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + rcall 1284f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 814f + rcall 858f + ldi r25,1 + eor r28,r25 + rcall 814f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 814f + rcall 858f + ldi r25,2 + eor r28,r25 + rcall 814f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 814f + rcall 858f + ldi r25,4 + eor r28,r25 + rcall 814f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + rcall 1284f + ldd r18,Z+12 + ldi r25,1 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+28 + ldi r24,2 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+44 + ldi r17,4 + eor r18,r17 + std Z+44,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 814f + rcall 858f + ldi r25,1 + eor r18,r25 + eor r22,r25 + rcall 814f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 814f + rcall 858f + ldi r25,2 + eor r18,r25 + eor r22,r25 + rcall 814f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 814f + rcall 858f + ldi r25,4 + eor r18,r25 + eor r22,r25 + rcall 814f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + rcall 1284f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+8 + eor r18,r25 + std Z+8,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+24 + eor r18,r24 + std Z+24,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+40 + eor r18,r17 + std Z+40,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 814f + rcall 858f + ldi r25,1 + eor r28,r25 + eor r4,r25 + rcall 814f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 814f + rcall 858f + ldi r25,2 + eor r28,r25 + eor r4,r25 + rcall 814f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 814f + rcall 858f + ldi r25,4 + eor r28,r25 + eor r4,r25 + rcall 814f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + rcall 1284f + ld r18,Z + ldi r25,1 + eor r18,r25 + st Z,r18 + ldd r18,Z+4 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+12 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+16 + ldi r24,2 + eor r18,r24 + std Z+16,r18 + ldd r18,Z+20 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+28 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+32 + ldi r17,4 + eor r18,r17 + std Z+32,r18 + ldd r18,Z+36 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+44 + eor r18,r17 + std Z+44,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 814f + rcall 858f + ldi r25,1 + eor r18,r25 + eor r28,r25 + rcall 814f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 814f + rcall 858f + ldi r25,2 + eor r18,r25 + eor r28,r25 + rcall 814f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 814f + rcall 858f + ldi r25,4 + eor r18,r25 + eor r28,r25 + rcall 814f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + rcall 1284f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+12 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+28 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+44 + eor r18,r17 + std Z+44,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 814f + rcall 858f + ldi r25,1 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 814f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 814f + rcall 858f + ldi r25,2 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 814f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 814f + rcall 858f + ldi r25,4 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 814f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + rcall 1284f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+8 + eor r18,r25 + std Z+8,r18 + ldd r18,Z+12 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+24 + eor r18,r24 + std Z+24,r18 + ldd r18,Z+28 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+40 + eor r18,r17 + std Z+40,r18 + ldd r18,Z+44 + eor r18,r17 + std Z+44,r18 + rjmp 1430f +814: + movw r8,r18 + movw r10,r20 + and r8,r22 + and r9,r23 + and r10,r26 + and r11,r27 + eor r8,r28 + eor r9,r29 + eor r10,r2 + eor r11,r3 + movw r12,r4 + movw r14,r6 + and r12,r18 + and r13,r19 + and r14,r20 + and r15,r21 + eor r12,r22 + eor r13,r23 + eor r14,r26 + eor r15,r27 + movw r28,r8 + movw r2,r10 + and r28,r12 + and r29,r13 + and r2,r14 + and r3,r15 + eor r28,r4 + eor r29,r5 + eor r2,r6 + eor r3,r7 + and r4,r8 + and r5,r9 + and r6,r10 + and r7,r11 + eor r4,r18 + eor r5,r19 + eor r6,r20 + eor r7,r21 + movw r18,r12 + movw r20,r14 + movw r22,r8 + movw r26,r10 + ret +858: + mov r8,r19 + mov r9,r20 + mov r10,r21 + mov r11,r18 + mov r0,r1 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + or r11,r0 + eor r8,r18 + eor r9,r19 + eor r10,r20 + eor r11,r21 + mov r12,r23 + mov r13,r26 + mov r14,r27 + mov r15,r22 + mov r0,r1 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + or r15,r0 + eor r12,r22 + eor r13,r23 + eor r14,r26 + eor r15,r27 + movw r24,r8 + movw r16,r10 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r8,r24 + eor r9,r25 + eor r10,r16 + eor r11,r17 + movw r24,r12 + movw r16,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r12,r24 + eor r13,r25 + eor r14,r16 + eor r15,r17 + mov r0,r20 + mov r20,r18 + mov r18,r0 + mov r0,r21 + mov r21,r19 + mov r19,r0 + bst r18,0 + lsr r21 + ror r20 + ror r19 + ror r18 + bld r21,7 + eor r18,r8 + eor r19,r9 + eor r20,r10 + eor r21,r11 + mov r0,r26 + mov r26,r22 + mov r22,r0 + mov r0,r27 + mov r27,r23 + mov r23,r0 + bst r22,0 + lsr r27 + ror r26 + ror r23 + ror r22 + bld r27,7 + eor r22,r12 + eor r23,r13 + eor r26,r14 + eor r27,r15 + movw r8,r18 + movw r10,r20 + lsl r8 + rol r9 + rol r10 + rol r11 + adc r8,r1 + eor r8,r18 + eor r9,r19 + eor r10,r20 + eor r11,r21 + movw r12,r22 + movw r14,r26 + lsl r12 + rol r13 + rol r14 + rol r15 + adc r12,r1 + eor r12,r22 + eor r13,r23 + eor r14,r26 + eor r15,r27 + mov r24,r15 + mov r25,r12 + mov r16,r13 + mov r17,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r18,r24 + eor r19,r25 + eor r20,r16 + eor r21,r17 + mov r24,r11 + mov r25,r8 + mov r16,r9 + mov r17,r10 + bst r24,0 + lsr r17 + ror r16 + ror r25 + ror r24 + bld r17,7 + eor r22,r24 + eor r23,r25 + eor r26,r16 + eor r27,r17 + lsl r10 + rol r11 + rol r8 + rol r9 + adc r10,r1 + eor r18,r10 + eor r19,r11 + eor r20,r8 + eor r21,r9 + lsl r14 + rol r15 + rol r12 + rol r13 + adc r14,r1 + eor r22,r14 + eor r23,r15 + eor r26,r12 + eor r27,r13 + mov r8,r29 + mov r9,r2 + mov r10,r3 + mov r11,r28 + mov r0,r1 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + or r11,r0 + eor r8,r28 + eor r9,r29 + eor r10,r2 + eor r11,r3 + mov r12,r5 + mov r13,r6 + mov r14,r7 + mov r15,r4 + mov r0,r1 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + or r15,r0 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + movw r24,r8 + movw r16,r10 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r8,r24 + eor r9,r25 + eor r10,r16 + eor r11,r17 + movw r24,r12 + movw r16,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r12,r24 + eor r13,r25 + eor r14,r16 + eor r15,r17 + mov r0,r2 + mov r2,r28 + mov r28,r0 + mov r0,r3 + mov r3,r29 + mov r29,r0 + bst r28,0 + lsr r3 + ror r2 + ror r29 + ror r28 + bld r3,7 + eor r28,r8 + eor r29,r9 + eor r2,r10 + eor r3,r11 + mov r0,r6 + mov r6,r4 + mov r4,r0 + mov r0,r7 + mov r7,r5 + mov r5,r0 + bst r4,0 + lsr r7 + ror r6 + ror r5 + ror r4 + bld r7,7 + eor r4,r12 + eor r5,r13 + eor r6,r14 + eor r7,r15 + movw r8,r28 + movw r10,r2 + lsl r8 + rol r9 + rol r10 + rol r11 + adc r8,r1 + eor r8,r28 + eor r9,r29 + eor r10,r2 + eor r11,r3 + movw r12,r4 + movw r14,r6 + lsl r12 + rol r13 + rol r14 + rol r15 + adc r12,r1 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + mov r24,r15 + mov r25,r12 + mov r16,r13 + mov r17,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r28,r24 + eor r29,r25 + eor r2,r16 + eor r3,r17 + mov r24,r11 + mov r25,r8 + mov r16,r9 + mov r17,r10 + bst r24,0 + lsr r17 + ror r16 + ror r25 + ror r24 + bld r17,7 + eor r4,r24 + eor r5,r25 + eor r6,r16 + eor r7,r17 + lsl r10 + rol r11 + rol r8 + rol r9 + adc r10,r1 + eor r28,r10 + eor r29,r11 + eor r2,r8 + eor r3,r9 + lsl r14 + rol r15 + rol r12 + rol r13 + adc r14,r1 + eor r4,r14 + eor r5,r15 + eor r6,r12 + eor r7,r13 + ret +1284: + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+16 + ldd r23,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + ldd r28,Z+32 + ldd r29,Z+33 + ldd r2,Z+34 + ldd r3,Z+35 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + std Z+32,r22 + std Z+33,r23 + std Z+34,r26 + std Z+35,r27 + eor r22,r28 + eor r23,r29 + eor r26,r2 + eor r27,r3 + st Z,r22 + std Z+1,r23 + std Z+2,r26 + std Z+3,r27 + eor r18,r28 + eor r19,r29 + eor r20,r2 + eor r21,r3 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r20,Z+6 + ldd r21,Z+7 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+36 + ldd r29,Z+37 + ldd r2,Z+38 + ldd r3,Z+39 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + eor r22,r28 + eor r23,r29 + eor r26,r2 + eor r27,r3 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + eor r18,r28 + eor r19,r29 + eor r20,r2 + eor r21,r3 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r20,Z+10 + ldd r21,Z+11 + ldd r22,Z+24 + ldd r23,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + std Z+40,r22 + std Z+41,r23 + std Z+42,r26 + std Z+43,r27 + eor r22,r28 + eor r23,r29 + eor r26,r2 + eor r27,r3 + std Z+8,r22 + std Z+9,r23 + std Z+10,r26 + std Z+11,r27 + eor r18,r28 + eor r19,r29 + eor r20,r2 + eor r21,r3 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r20,Z+14 + ldd r21,Z+15 + ldd r22,Z+28 + ldd r23,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + ldd r28,Z+44 + ldd r29,Z+45 + ldd r2,Z+46 + ldd r3,Z+47 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + std Z+44,r22 + std Z+45,r23 + std Z+46,r26 + std Z+47,r27 + eor r22,r28 + eor r23,r29 + eor r26,r2 + eor r27,r3 + std Z+12,r22 + std Z+13,r23 + std Z+14,r26 + std Z+15,r27 + eor r18,r28 + eor r19,r29 + eor r20,r2 + eor r21,r3 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ret +1430: + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size shadow384, .-shadow384 + +#endif diff --git a/spook/Implementations/crypto_aead/spook128mu512v1/rhys/internal-spook.c b/spook/Implementations/crypto_aead/spook128mu512v1/rhys/internal-spook.c index 0e19216..068938b 100644 --- a/spook/Implementations/crypto_aead/spook128mu512v1/rhys/internal-spook.c +++ b/spook/Implementations/crypto_aead/spook128mu512v1/rhys/internal-spook.c @@ -22,6 +22,8 @@ #include "internal-spook.h" +#if !defined(__AVR__) + /** * \brief Number of steps in the Clyde-128 block cipher. * @@ -43,9 +45,9 @@ static uint8_t const rc[CLYDE128_STEPS][8] = { }; void clyde128_encrypt(const unsigned char key[CLYDE128_KEY_SIZE], - const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4], uint32_t output[CLYDE128_BLOCK_SIZE / 4], - const uint32_t input[CLYDE128_BLOCK_SIZE / 4]) + const uint32_t input[CLYDE128_BLOCK_SIZE / 4], + const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4]) { uint32_t k0, k1, k2, k3; uint32_t t0, t1, t2, t3; @@ -154,9 +156,9 @@ void clyde128_encrypt(const unsigned char key[CLYDE128_KEY_SIZE], } void clyde128_decrypt(const unsigned char key[CLYDE128_KEY_SIZE], - const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4], uint32_t output[CLYDE128_BLOCK_SIZE / 4], - const unsigned char input[CLYDE128_BLOCK_SIZE]) + const unsigned char input[CLYDE128_BLOCK_SIZE], + const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4]) { uint32_t k0, k1, k2, k3; uint32_t t0, t1, t2, t3; @@ -555,3 +557,5 @@ void shadow384(shadow384_state_t *state) le_store_word32(state->B + 44, s23); #endif } + +#endif /* !__AVR__ */ diff --git a/spook/Implementations/crypto_aead/spook128mu512v1/rhys/internal-spook.h b/spook/Implementations/crypto_aead/spook128mu512v1/rhys/internal-spook.h index b08ce80..77c8b86 100644 --- a/spook/Implementations/crypto_aead/spook128mu512v1/rhys/internal-spook.h +++ b/spook/Implementations/crypto_aead/spook128mu512v1/rhys/internal-spook.h @@ -93,31 +93,31 @@ typedef union * \brief Encrypts a block with the Clyde-128 block cipher. * * \param key Points to the key to encrypt with. - * \param tweak Points to the tweak to encrypt with. * \param output Output buffer for the ciphertext. * \param input Input buffer for the plaintext. + * \param tweak Points to the tweak to encrypt with. * * \sa clyde128_decrypt() */ void clyde128_encrypt(const unsigned char key[CLYDE128_KEY_SIZE], - const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4], uint32_t output[CLYDE128_BLOCK_SIZE / 4], - const uint32_t input[CLYDE128_BLOCK_SIZE / 4]); + const uint32_t input[CLYDE128_BLOCK_SIZE / 4], + const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4]); /** * \brief Decrypts a block with the Clyde-128 block cipher. * * \param key Points to the key to decrypt with. - * \param tweak Points to the tweak to decrypt with. * \param output Output buffer for the plaintext. * \param input Input buffer for the ciphertext. + * \param tweak Points to the tweak to decrypt with. * * \sa clyde128_encrypt() */ void clyde128_decrypt(const unsigned char key[CLYDE128_KEY_SIZE], - const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4], uint32_t output[CLYDE128_BLOCK_SIZE / 4], - const unsigned char input[CLYDE128_BLOCK_SIZE]); + const unsigned char input[CLYDE128_BLOCK_SIZE], + const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4]); /** * \brief Performs the Shadow-512 permutation on a state. diff --git a/spook/Implementations/crypto_aead/spook128mu512v1/rhys/spook.c b/spook/Implementations/crypto_aead/spook128mu512v1/rhys/spook.c index d075b33..2dbab94 100644 --- a/spook/Implementations/crypto_aead/spook128mu512v1/rhys/spook.c +++ b/spook/Implementations/crypto_aead/spook128mu512v1/rhys/spook.c @@ -86,7 +86,7 @@ static void spook_128_512_init state->B[CLYDE128_BLOCK_SIZE - 1] |= 0x40; } memcpy(state->B + CLYDE128_BLOCK_SIZE, npub, CLYDE128_BLOCK_SIZE); - clyde128_encrypt(k, state->W, state->W + 12, state->W + 4); + clyde128_encrypt(k, state->W + 12, state->W + 4, state->W); shadow512(state); } @@ -111,7 +111,7 @@ static void spook_128_384_init state->B[CLYDE128_BLOCK_SIZE - 1] |= 0x40; } memcpy(state->B + CLYDE128_BLOCK_SIZE, npub, CLYDE128_BLOCK_SIZE); - clyde128_encrypt(k, state->W, state->W + 8, state->W + 4); + clyde128_encrypt(k, state->W + 8, state->W + 4, state->W); shadow384(state); } @@ -310,7 +310,7 @@ int spook_128_512_su_aead_encrypt /* Compute the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_encrypt(k, state.W + 4, state.W, state.W); + clyde128_encrypt(k, state.W, state.W, state.W + 4); memcpy(c + mlen, state.B, SPOOK_TAG_SIZE); return 0; } @@ -345,7 +345,7 @@ int spook_128_512_su_aead_decrypt /* Check the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_decrypt(k, state.W + 4, state.W + 4, c + clen); + clyde128_decrypt(k, state.W + 4, c + clen, state.W + 4); return aead_check_tag (m, clen, state.B, state.B + CLYDE128_BLOCK_SIZE, SPOOK_TAG_SIZE); } @@ -377,7 +377,7 @@ int spook_128_384_su_aead_encrypt /* Compute the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_encrypt(k, state.W + 4, state.W, state.W); + clyde128_encrypt(k, state.W, state.W, state.W + 4); memcpy(c + mlen, state.B, SPOOK_TAG_SIZE); return 0; } @@ -412,7 +412,7 @@ int spook_128_384_su_aead_decrypt /* Check the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_decrypt(k, state.W + 4, state.W + 4, c + clen); + clyde128_decrypt(k, state.W + 4, c + clen, state.W + 4); return aead_check_tag (m, clen, state.B, state.B + CLYDE128_BLOCK_SIZE, SPOOK_TAG_SIZE); } @@ -444,7 +444,7 @@ int spook_128_512_mu_aead_encrypt /* Compute the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_encrypt(k, state.W + 4, state.W, state.W); + clyde128_encrypt(k, state.W, state.W, state.W + 4); memcpy(c + mlen, state.B, SPOOK_TAG_SIZE); return 0; } @@ -479,7 +479,7 @@ int spook_128_512_mu_aead_decrypt /* Check the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_decrypt(k, state.W + 4, state.W + 4, c + clen); + clyde128_decrypt(k, state.W + 4, c + clen, state.W + 4); return aead_check_tag (m, clen, state.B, state.B + CLYDE128_BLOCK_SIZE, SPOOK_TAG_SIZE); } @@ -511,7 +511,7 @@ int spook_128_384_mu_aead_encrypt /* Compute the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_encrypt(k, state.W + 4, state.W, state.W); + clyde128_encrypt(k, state.W, state.W, state.W + 4); memcpy(c + mlen, state.B, SPOOK_TAG_SIZE); return 0; } @@ -546,7 +546,7 @@ int spook_128_384_mu_aead_decrypt /* Check the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_decrypt(k, state.W + 4, state.W + 4, c + clen); + clyde128_decrypt(k, state.W + 4, c + clen, state.W + 4); return aead_check_tag (m, clen, state.B, state.B + CLYDE128_BLOCK_SIZE, SPOOK_TAG_SIZE); } diff --git a/spook/Implementations/crypto_aead/spook128su384v1/rhys/internal-spook-avr.S b/spook/Implementations/crypto_aead/spook128su384v1/rhys/internal-spook-avr.S new file mode 100644 index 0000000..05ca51e --- /dev/null +++ b/spook/Implementations/crypto_aead/spook128su384v1/rhys/internal-spook-avr.S @@ -0,0 +1,4898 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .text +.global clyde128_encrypt + .type clyde128_encrypt, @function +clyde128_encrypt: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r23 + push r22 + movw r30,r24 + movw r26,r20 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 36 + ld r20,X+ + ld r21,X+ + ld r22,X+ + ld r23,X+ + ld r2,X+ + ld r3,X+ + ld r4,X+ + ld r5,X+ + ld r6,X+ + ld r7,X+ + ld r8,X+ + ld r9,X+ + ld r10,X+ + ld r11,X+ + ld r12,X+ + ld r13,X+ + ld r0,Z + eor r20,r0 + ldd r0,Z+1 + eor r21,r0 + ldd r0,Z+2 + eor r22,r0 + ldd r0,Z+3 + eor r23,r0 + ldd r0,Z+4 + eor r2,r0 + ldd r0,Z+5 + eor r3,r0 + ldd r0,Z+6 + eor r4,r0 + ldd r0,Z+7 + eor r5,r0 + ldd r0,Z+8 + eor r6,r0 + ldd r0,Z+9 + eor r7,r0 + ldd r0,Z+10 + eor r8,r0 + ldd r0,Z+11 + eor r9,r0 + ldd r0,Z+12 + eor r10,r0 + ldd r0,Z+13 + eor r11,r0 + ldd r0,Z+14 + eor r12,r0 + ldd r0,Z+15 + eor r13,r0 + movw r26,r18 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+1,r18 + std Y+2,r19 + std Y+3,r14 + std Y+4,r15 + eor r20,r18 + eor r21,r19 + eor r22,r14 + eor r23,r15 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+5,r18 + std Y+6,r19 + std Y+7,r14 + std Y+8,r15 + eor r2,r18 + eor r3,r19 + eor r4,r14 + eor r5,r15 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+9,r18 + std Y+10,r19 + std Y+11,r14 + std Y+12,r15 + eor r6,r18 + eor r7,r19 + eor r8,r14 + eor r9,r15 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+13,r18 + std Y+14,r19 + std Y+15,r14 + std Y+16,r15 + eor r10,r18 + eor r11,r19 + eor r12,r14 + eor r13,r15 + rcall 151f + ldi r27,1 + eor r20,r27 + rcall 151f + ldi r27,1 + eor r2,r27 + rcall 635f + rcall 151f + ldi r27,1 + eor r6,r27 + rcall 151f + ldi r27,1 + eor r10,r27 + rcall 635f + rcall 151f + ldi r27,1 + eor r20,r27 + eor r2,r27 + rcall 151f + ldi r27,1 + eor r2,r27 + eor r6,r27 + rcall 635f + rcall 151f + ldi r27,1 + eor r6,r27 + eor r10,r27 + rcall 151f + ldi r27,1 + eor r20,r27 + eor r2,r27 + eor r10,r27 + rcall 635f + rcall 151f + ldi r27,1 + eor r20,r27 + eor r6,r27 + rcall 151f + ldi r27,1 + eor r2,r27 + eor r10,r27 + rcall 635f + rcall 151f + ldi r27,1 + eor r20,r27 + eor r2,r27 + eor r6,r27 + rcall 151f + ldi r27,1 + eor r2,r27 + eor r6,r27 + eor r10,r27 + rcall 635f + rjmp 725f +151: + movw r18,r20 + movw r14,r22 + and r18,r2 + and r19,r3 + and r14,r4 + and r15,r5 + eor r18,r6 + eor r19,r7 + eor r14,r8 + eor r15,r9 + movw r24,r10 + movw r16,r12 + and r24,r20 + and r25,r21 + and r16,r22 + and r17,r23 + eor r24,r2 + eor r25,r3 + eor r16,r4 + eor r17,r5 + movw r6,r18 + movw r8,r14 + and r6,r24 + and r7,r25 + and r8,r16 + and r9,r17 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + and r10,r18 + and r11,r19 + and r12,r14 + and r13,r15 + eor r10,r20 + eor r11,r21 + eor r12,r22 + eor r13,r23 + movw r20,r24 + movw r22,r16 + movw r2,r18 + movw r4,r14 + mov r18,r21 + mov r19,r22 + mov r14,r23 + mov r15,r20 + eor r0,r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + or r15,r0 + eor r18,r20 + eor r19,r21 + eor r14,r22 + eor r15,r23 + mov r24,r3 + mov r25,r4 + mov r16,r5 + mov r17,r2 + eor r0,r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r24,r2 + eor r25,r3 + eor r16,r4 + eor r17,r5 + movw r26,r18 + mov r1,r14 + mov r0,r15 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r18,r26 + eor r19,r27 + eor r14,r1 + eor r15,r0 + movw r26,r24 + mov r1,r16 + mov r0,r17 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r24,r26 + eor r25,r27 + eor r16,r1 + eor r17,r0 + mov r0,r22 + mov r22,r20 + mov r20,r0 + mov r0,r23 + mov r23,r21 + mov r21,r0 + bst r20,0 + lsr r23 + ror r22 + ror r21 + ror r20 + bld r23,7 + eor r20,r18 + eor r21,r19 + eor r22,r14 + eor r23,r15 + mov r0,r4 + mov r4,r2 + mov r2,r0 + mov r0,r5 + mov r5,r3 + mov r3,r0 + bst r2,0 + lsr r5 + ror r4 + ror r3 + ror r2 + bld r5,7 + eor r2,r24 + eor r3,r25 + eor r4,r16 + eor r5,r17 + movw r18,r20 + movw r14,r22 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + eor r18,r20 + eor r19,r21 + eor r14,r22 + eor r15,r23 + movw r24,r2 + movw r16,r4 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + eor r24,r2 + eor r25,r3 + eor r16,r4 + eor r17,r5 + mov r26,r17 + mov r27,r24 + mov r1,r25 + mov r0,r16 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r20,r26 + eor r21,r27 + eor r22,r1 + eor r23,r0 + mov r26,r15 + mov r27,r18 + mov r1,r19 + mov r0,r14 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r2,r26 + eor r3,r27 + eor r4,r1 + eor r5,r0 + bst r19,7 + lsl r14 + rol r15 + rol r18 + rol r19 + bld r14,0 + eor r20,r14 + eor r21,r15 + eor r22,r18 + eor r23,r19 + bst r25,7 + lsl r16 + rol r17 + rol r24 + rol r25 + bld r16,0 + eor r2,r16 + eor r3,r17 + eor r4,r24 + eor r5,r25 + mov r18,r7 + mov r19,r8 + mov r14,r9 + mov r15,r6 + eor r0,r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + or r15,r0 + eor r18,r6 + eor r19,r7 + eor r14,r8 + eor r15,r9 + mov r24,r11 + mov r25,r12 + mov r16,r13 + mov r17,r10 + eor r0,r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r24,r10 + eor r25,r11 + eor r16,r12 + eor r17,r13 + movw r26,r18 + mov r1,r14 + mov r0,r15 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r18,r26 + eor r19,r27 + eor r14,r1 + eor r15,r0 + movw r26,r24 + mov r1,r16 + mov r0,r17 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r24,r26 + eor r25,r27 + eor r16,r1 + eor r17,r0 + mov r0,r8 + mov r8,r6 + mov r6,r0 + mov r0,r9 + mov r9,r7 + mov r7,r0 + bst r6,0 + lsr r9 + ror r8 + ror r7 + ror r6 + bld r9,7 + eor r6,r18 + eor r7,r19 + eor r8,r14 + eor r9,r15 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + bst r10,0 + lsr r13 + ror r12 + ror r11 + ror r10 + bld r13,7 + eor r10,r24 + eor r11,r25 + eor r12,r16 + eor r13,r17 + movw r18,r6 + movw r14,r8 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + eor r18,r6 + eor r19,r7 + eor r14,r8 + eor r15,r9 + movw r24,r10 + movw r16,r12 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + eor r24,r10 + eor r25,r11 + eor r16,r12 + eor r17,r13 + mov r26,r17 + mov r27,r24 + mov r1,r25 + mov r0,r16 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r6,r26 + eor r7,r27 + eor r8,r1 + eor r9,r0 + mov r26,r15 + mov r27,r18 + mov r1,r19 + mov r0,r14 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r10,r26 + eor r11,r27 + eor r12,r1 + eor r13,r0 + bst r19,7 + lsl r14 + rol r15 + rol r18 + rol r19 + bld r14,0 + eor r6,r14 + eor r7,r15 + eor r8,r18 + eor r9,r19 + bst r25,7 + lsl r16 + rol r17 + rol r24 + rol r25 + bld r16,0 + eor r10,r16 + eor r11,r17 + eor r12,r24 + eor r13,r25 + ret +635: + ldd r18,Y+9 + ldd r19,Y+10 + ldd r14,Y+11 + ldd r15,Y+12 + ldd r24,Y+1 + ldd r25,Y+2 + ldd r16,Y+3 + ldd r17,Y+4 + eor r18,r24 + eor r19,r25 + eor r14,r16 + eor r15,r17 + std Y+9,r24 + std Y+10,r25 + std Y+11,r16 + std Y+12,r17 + std Y+1,r18 + std Y+2,r19 + std Y+3,r14 + std Y+4,r15 + eor r20,r18 + eor r21,r19 + eor r22,r14 + eor r23,r15 + eor r6,r24 + eor r7,r25 + eor r8,r16 + eor r9,r17 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r14,Y+15 + ldd r15,Y+16 + ldd r24,Y+5 + ldd r25,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + eor r18,r24 + eor r19,r25 + eor r14,r16 + eor r15,r17 + std Y+13,r24 + std Y+14,r25 + std Y+15,r16 + std Y+16,r17 + std Y+5,r18 + std Y+6,r19 + std Y+7,r14 + std Y+8,r15 + eor r2,r18 + eor r3,r19 + eor r4,r14 + eor r5,r15 + eor r10,r24 + eor r11,r25 + eor r12,r16 + eor r13,r17 + ld r0,Z + eor r20,r0 + ldd r0,Z+1 + eor r21,r0 + ldd r0,Z+2 + eor r22,r0 + ldd r0,Z+3 + eor r23,r0 + ldd r0,Z+4 + eor r2,r0 + ldd r0,Z+5 + eor r3,r0 + ldd r0,Z+6 + eor r4,r0 + ldd r0,Z+7 + eor r5,r0 + ldd r0,Z+8 + eor r6,r0 + ldd r0,Z+9 + eor r7,r0 + ldd r0,Z+10 + eor r8,r0 + ldd r0,Z+11 + eor r9,r0 + ldd r0,Z+12 + eor r10,r0 + ldd r0,Z+13 + eor r11,r0 + ldd r0,Z+14 + eor r12,r0 + ldd r0,Z+15 + eor r13,r0 + ret +725: + ldd r26,Y+17 + ldd r27,Y+18 + st X+,r20 + st X+,r21 + st X+,r22 + st X+,r23 + st X+,r2 + st X+,r3 + st X+,r4 + st X+,r5 + st X+,r6 + st X+,r7 + st X+,r8 + st X+,r9 + st X+,r10 + st X+,r11 + st X+,r12 + st X+,r13 + adiw r28,18 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size clyde128_encrypt, .-clyde128_encrypt + + .text +.global clyde128_decrypt + .type clyde128_decrypt, @function +clyde128_decrypt: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r23 + push r22 + movw r30,r24 + movw r26,r20 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 36 + ld r20,X+ + ld r21,X+ + ld r22,X+ + ld r23,X+ + ld r2,X+ + ld r3,X+ + ld r4,X+ + ld r5,X+ + ld r6,X+ + ld r7,X+ + ld r8,X+ + ld r9,X+ + ld r10,X+ + ld r11,X+ + ld r12,X+ + ld r13,X+ + movw r26,r18 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+1,r18 + std Y+2,r19 + std Y+3,r14 + std Y+4,r15 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+5,r18 + std Y+6,r19 + std Y+7,r14 + std Y+8,r15 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+9,r18 + std Y+10,r19 + std Y+11,r14 + std Y+12,r15 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+13,r18 + std Y+14,r19 + std Y+15,r14 + std Y+16,r15 + rcall 533f + ldi r27,1 + eor r2,r27 + eor r6,r27 + eor r10,r27 + rcall 103f + ldi r27,1 + eor r20,r27 + eor r2,r27 + eor r6,r27 + rcall 103f + rcall 533f + ldi r27,1 + eor r2,r27 + eor r10,r27 + rcall 103f + ldi r27,1 + eor r20,r27 + eor r6,r27 + rcall 103f + rcall 533f + ldi r27,1 + eor r20,r27 + eor r2,r27 + eor r10,r27 + rcall 103f + ldi r27,1 + eor r6,r27 + eor r10,r27 + rcall 103f + rcall 533f + ldi r27,1 + eor r2,r27 + eor r6,r27 + rcall 103f + ldi r27,1 + eor r20,r27 + eor r2,r27 + rcall 103f + rcall 533f + ldi r27,1 + eor r10,r27 + rcall 103f + ldi r27,1 + eor r6,r27 + rcall 103f + rcall 533f + ldi r27,1 + eor r2,r27 + rcall 103f + ldi r27,1 + eor r20,r27 + rcall 103f + rjmp 623f +103: + mov r18,r23 + mov r19,r20 + mov r14,r21 + mov r15,r22 + bst r18,0 + lsr r15 + ror r14 + ror r19 + ror r18 + bld r15,7 + eor r18,r20 + eor r19,r21 + eor r14,r22 + eor r15,r23 + mov r24,r5 + mov r25,r2 + mov r16,r3 + mov r17,r4 + bst r24,0 + lsr r17 + ror r16 + ror r25 + ror r24 + bld r17,7 + eor r24,r2 + eor r25,r3 + eor r16,r4 + eor r17,r5 + movw r26,r18 + mov r1,r14 + mov r0,r15 + bst r0,7 + lsl r26 + rol r27 + rol r1 + rol r0 + bld r26,0 + eor r20,r26 + eor r21,r27 + eor r22,r1 + eor r23,r0 + movw r26,r24 + mov r1,r16 + mov r0,r17 + bst r0,7 + lsl r26 + rol r27 + rol r1 + rol r0 + bld r26,0 + eor r2,r26 + eor r3,r27 + eor r4,r1 + eor r5,r0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + eor r20,r15 + eor r21,r18 + eor r22,r19 + eor r23,r14 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + eor r2,r17 + eor r3,r24 + eor r4,r25 + eor r5,r16 + movw r18,r20 + movw r14,r22 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + eor r18,r20 + eor r19,r21 + eor r14,r22 + eor r15,r23 + movw r24,r2 + movw r16,r4 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + eor r24,r2 + eor r25,r3 + eor r16,r4 + eor r17,r5 + mov r26,r17 + mov r27,r24 + mov r1,r25 + mov r0,r16 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r20,r26 + eor r21,r27 + eor r22,r1 + eor r23,r0 + mov r26,r15 + mov r27,r18 + mov r1,r19 + mov r0,r14 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r2,r26 + eor r3,r27 + eor r4,r1 + eor r5,r0 + bst r20,0 + lsr r23 + ror r22 + ror r21 + ror r20 + bld r23,7 + eor r18,r22 + eor r19,r23 + eor r14,r20 + eor r15,r21 + bst r2,0 + lsr r5 + ror r4 + ror r3 + ror r2 + bld r5,7 + eor r24,r4 + eor r25,r5 + eor r16,r2 + eor r17,r3 + movw r20,r14 + movw r22,r18 + movw r2,r16 + movw r4,r24 + mov r18,r9 + mov r19,r6 + mov r14,r7 + mov r15,r8 + bst r18,0 + lsr r15 + ror r14 + ror r19 + ror r18 + bld r15,7 + eor r18,r6 + eor r19,r7 + eor r14,r8 + eor r15,r9 + mov r24,r13 + mov r25,r10 + mov r16,r11 + mov r17,r12 + bst r24,0 + lsr r17 + ror r16 + ror r25 + ror r24 + bld r17,7 + eor r24,r10 + eor r25,r11 + eor r16,r12 + eor r17,r13 + movw r26,r18 + mov r1,r14 + mov r0,r15 + bst r0,7 + lsl r26 + rol r27 + rol r1 + rol r0 + bld r26,0 + eor r6,r26 + eor r7,r27 + eor r8,r1 + eor r9,r0 + movw r26,r24 + mov r1,r16 + mov r0,r17 + bst r0,7 + lsl r26 + rol r27 + rol r1 + rol r0 + bld r26,0 + eor r10,r26 + eor r11,r27 + eor r12,r1 + eor r13,r0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + eor r6,r15 + eor r7,r18 + eor r8,r19 + eor r9,r14 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + eor r10,r17 + eor r11,r24 + eor r12,r25 + eor r13,r16 + movw r18,r6 + movw r14,r8 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + eor r18,r6 + eor r19,r7 + eor r14,r8 + eor r15,r9 + movw r24,r10 + movw r16,r12 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + eor r24,r10 + eor r25,r11 + eor r16,r12 + eor r17,r13 + mov r26,r17 + mov r27,r24 + mov r1,r25 + mov r0,r16 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r6,r26 + eor r7,r27 + eor r8,r1 + eor r9,r0 + mov r26,r15 + mov r27,r18 + mov r1,r19 + mov r0,r14 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r10,r26 + eor r11,r27 + eor r12,r1 + eor r13,r0 + bst r6,0 + lsr r9 + ror r8 + ror r7 + ror r6 + bld r9,7 + eor r18,r8 + eor r19,r9 + eor r14,r6 + eor r15,r7 + bst r10,0 + lsr r13 + ror r12 + ror r11 + ror r10 + bld r13,7 + eor r24,r12 + eor r25,r13 + eor r16,r10 + eor r17,r11 + movw r6,r14 + movw r8,r18 + movw r10,r16 + movw r12,r24 + movw r24,r20 + movw r16,r22 + and r24,r2 + and r25,r3 + and r16,r4 + and r17,r5 + eor r24,r6 + eor r25,r7 + eor r16,r8 + eor r17,r9 + movw r18,r2 + movw r14,r4 + and r18,r24 + and r19,r25 + and r14,r16 + and r15,r17 + eor r18,r10 + eor r19,r11 + eor r14,r12 + eor r15,r13 + movw r10,r24 + movw r12,r16 + and r24,r18 + and r25,r19 + and r16,r14 + and r17,r15 + eor r24,r20 + eor r25,r21 + eor r16,r22 + eor r17,r23 + movw r6,r18 + movw r8,r14 + and r6,r24 + and r7,r25 + and r8,r16 + and r9,r17 + eor r6,r2 + eor r7,r3 + eor r8,r4 + eor r9,r5 + movw r20,r18 + movw r22,r14 + movw r2,r24 + movw r4,r16 + ret +533: + ld r0,Z + eor r20,r0 + ldd r0,Z+1 + eor r21,r0 + ldd r0,Z+2 + eor r22,r0 + ldd r0,Z+3 + eor r23,r0 + ldd r0,Z+4 + eor r2,r0 + ldd r0,Z+5 + eor r3,r0 + ldd r0,Z+6 + eor r4,r0 + ldd r0,Z+7 + eor r5,r0 + ldd r0,Z+8 + eor r6,r0 + ldd r0,Z+9 + eor r7,r0 + ldd r0,Z+10 + eor r8,r0 + ldd r0,Z+11 + eor r9,r0 + ldd r0,Z+12 + eor r10,r0 + ldd r0,Z+13 + eor r11,r0 + ldd r0,Z+14 + eor r12,r0 + ldd r0,Z+15 + eor r13,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r14,Y+3 + ldd r15,Y+4 + ldd r24,Y+9 + ldd r25,Y+10 + ldd r16,Y+11 + ldd r17,Y+12 + eor r20,r18 + eor r21,r19 + eor r22,r14 + eor r23,r15 + eor r6,r24 + eor r7,r25 + eor r8,r16 + eor r9,r17 + eor r18,r24 + eor r19,r25 + eor r14,r16 + eor r15,r17 + std Y+1,r24 + std Y+2,r25 + std Y+3,r16 + std Y+4,r17 + std Y+9,r18 + std Y+10,r19 + std Y+11,r14 + std Y+12,r15 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r14,Y+7 + ldd r15,Y+8 + ldd r24,Y+13 + ldd r25,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + eor r2,r18 + eor r3,r19 + eor r4,r14 + eor r5,r15 + eor r10,r24 + eor r11,r25 + eor r12,r16 + eor r13,r17 + eor r18,r24 + eor r19,r25 + eor r14,r16 + eor r15,r17 + std Y+5,r24 + std Y+6,r25 + std Y+7,r16 + std Y+8,r17 + std Y+13,r18 + std Y+14,r19 + std Y+15,r14 + std Y+16,r15 + ret +623: + ld r0,Z + eor r20,r0 + ldd r0,Z+1 + eor r21,r0 + ldd r0,Z+2 + eor r22,r0 + ldd r0,Z+3 + eor r23,r0 + ldd r0,Z+4 + eor r2,r0 + ldd r0,Z+5 + eor r3,r0 + ldd r0,Z+6 + eor r4,r0 + ldd r0,Z+7 + eor r5,r0 + ldd r0,Z+8 + eor r6,r0 + ldd r0,Z+9 + eor r7,r0 + ldd r0,Z+10 + eor r8,r0 + ldd r0,Z+11 + eor r9,r0 + ldd r0,Z+12 + eor r10,r0 + ldd r0,Z+13 + eor r11,r0 + ldd r0,Z+14 + eor r12,r0 + ldd r0,Z+15 + eor r13,r0 + ldd r0,Y+1 + eor r20,r0 + ldd r0,Y+2 + eor r21,r0 + ldd r0,Y+3 + eor r22,r0 + ldd r0,Y+4 + eor r23,r0 + ldd r0,Y+5 + eor r2,r0 + ldd r0,Y+6 + eor r3,r0 + ldd r0,Y+7 + eor r4,r0 + ldd r0,Y+8 + eor r5,r0 + ldd r0,Y+9 + eor r6,r0 + ldd r0,Y+10 + eor r7,r0 + ldd r0,Y+11 + eor r8,r0 + ldd r0,Y+12 + eor r9,r0 + ldd r0,Y+13 + eor r10,r0 + ldd r0,Y+14 + eor r11,r0 + ldd r0,Y+15 + eor r12,r0 + ldd r0,Y+16 + eor r13,r0 + ldd r26,Y+17 + ldd r27,Y+18 + st X+,r20 + st X+,r21 + st X+,r22 + st X+,r23 + st X+,r2 + st X+,r3 + st X+,r4 + st X+,r5 + st X+,r6 + st X+,r7 + st X+,r8 + st X+,r9 + st X+,r10 + st X+,r11 + st X+,r12 + st X+,r13 + adiw r28,18 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size clyde128_decrypt, .-clyde128_decrypt + + .text +.global shadow512 + .type shadow512, @function +shadow512: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 1083f + rcall 1127f + ldi r25,1 + eor r18,r25 + rcall 1083f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 1083f + rcall 1127f + ldi r25,2 + eor r18,r25 + rcall 1083f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 1083f + rcall 1127f + ldi r25,4 + eor r18,r25 + rcall 1083f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + ldd r18,Z+48 + ldd r19,Z+49 + ldd r20,Z+50 + ldd r21,Z+51 + ldd r22,Z+52 + ldd r23,Z+53 + ldd r26,Z+54 + ldd r27,Z+55 + ldd r28,Z+56 + ldd r29,Z+57 + ldd r2,Z+58 + ldd r3,Z+59 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + rcall 1083f + rcall 1127f + ldi r25,8 + eor r18,r25 + rcall 1083f + std Z+48,r18 + std Z+49,r19 + std Z+50,r20 + std Z+51,r21 + std Z+52,r22 + std Z+53,r23 + std Z+54,r26 + std Z+55,r27 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + std Z+60,r4 + std Z+61,r5 + std Z+62,r6 + std Z+63,r7 + rcall 1553f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+52 + ldi r16,8 + eor r18,r16 + std Z+52,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 1083f + rcall 1127f + ldi r25,1 + eor r28,r25 + rcall 1083f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 1083f + rcall 1127f + ldi r25,2 + eor r28,r25 + rcall 1083f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 1083f + rcall 1127f + ldi r25,4 + eor r28,r25 + rcall 1083f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + ldd r18,Z+48 + ldd r19,Z+49 + ldd r20,Z+50 + ldd r21,Z+51 + ldd r22,Z+52 + ldd r23,Z+53 + ldd r26,Z+54 + ldd r27,Z+55 + ldd r28,Z+56 + ldd r29,Z+57 + ldd r2,Z+58 + ldd r3,Z+59 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + rcall 1083f + rcall 1127f + ldi r25,8 + eor r28,r25 + rcall 1083f + std Z+48,r18 + std Z+49,r19 + std Z+50,r20 + std Z+51,r21 + std Z+52,r22 + std Z+53,r23 + std Z+54,r26 + std Z+55,r27 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + std Z+60,r4 + std Z+61,r5 + std Z+62,r6 + std Z+63,r7 + rcall 1553f + ldd r18,Z+12 + ldi r25,1 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+28 + ldi r24,2 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+44 + ldi r17,4 + eor r18,r17 + std Z+44,r18 + ldd r18,Z+60 + ldi r16,8 + eor r18,r16 + std Z+60,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 1083f + rcall 1127f + ldi r25,1 + eor r18,r25 + eor r22,r25 + rcall 1083f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 1083f + rcall 1127f + ldi r25,2 + eor r18,r25 + eor r22,r25 + rcall 1083f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 1083f + rcall 1127f + ldi r25,4 + eor r18,r25 + eor r22,r25 + rcall 1083f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + ldd r18,Z+48 + ldd r19,Z+49 + ldd r20,Z+50 + ldd r21,Z+51 + ldd r22,Z+52 + ldd r23,Z+53 + ldd r26,Z+54 + ldd r27,Z+55 + ldd r28,Z+56 + ldd r29,Z+57 + ldd r2,Z+58 + ldd r3,Z+59 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + rcall 1083f + rcall 1127f + ldi r25,8 + eor r18,r25 + eor r22,r25 + rcall 1083f + std Z+48,r18 + std Z+49,r19 + std Z+50,r20 + std Z+51,r21 + std Z+52,r22 + std Z+53,r23 + std Z+54,r26 + std Z+55,r27 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + std Z+60,r4 + std Z+61,r5 + std Z+62,r6 + std Z+63,r7 + rcall 1553f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+8 + eor r18,r25 + std Z+8,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+24 + eor r18,r24 + std Z+24,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+40 + eor r18,r17 + std Z+40,r18 + ldd r18,Z+52 + ldi r16,8 + eor r18,r16 + std Z+52,r18 + ldd r18,Z+56 + eor r18,r16 + std Z+56,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 1083f + rcall 1127f + ldi r25,1 + eor r28,r25 + eor r4,r25 + rcall 1083f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 1083f + rcall 1127f + ldi r25,2 + eor r28,r25 + eor r4,r25 + rcall 1083f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 1083f + rcall 1127f + ldi r25,4 + eor r28,r25 + eor r4,r25 + rcall 1083f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + ldd r18,Z+48 + ldd r19,Z+49 + ldd r20,Z+50 + ldd r21,Z+51 + ldd r22,Z+52 + ldd r23,Z+53 + ldd r26,Z+54 + ldd r27,Z+55 + ldd r28,Z+56 + ldd r29,Z+57 + ldd r2,Z+58 + ldd r3,Z+59 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + rcall 1083f + rcall 1127f + ldi r25,8 + eor r28,r25 + eor r4,r25 + rcall 1083f + std Z+48,r18 + std Z+49,r19 + std Z+50,r20 + std Z+51,r21 + std Z+52,r22 + std Z+53,r23 + std Z+54,r26 + std Z+55,r27 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + std Z+60,r4 + std Z+61,r5 + std Z+62,r6 + std Z+63,r7 + rcall 1553f + ld r18,Z + ldi r25,1 + eor r18,r25 + st Z,r18 + ldd r18,Z+4 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+12 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+16 + ldi r24,2 + eor r18,r24 + std Z+16,r18 + ldd r18,Z+20 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+28 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+32 + ldi r17,4 + eor r18,r17 + std Z+32,r18 + ldd r18,Z+36 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+44 + eor r18,r17 + std Z+44,r18 + ldd r18,Z+48 + ldi r16,8 + eor r18,r16 + std Z+48,r18 + ldd r18,Z+52 + eor r18,r16 + std Z+52,r18 + ldd r18,Z+60 + eor r18,r16 + std Z+60,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 1083f + rcall 1127f + ldi r25,1 + eor r18,r25 + eor r28,r25 + rcall 1083f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 1083f + rcall 1127f + ldi r25,2 + eor r18,r25 + eor r28,r25 + rcall 1083f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 1083f + rcall 1127f + ldi r25,4 + eor r18,r25 + eor r28,r25 + rcall 1083f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + ldd r18,Z+48 + ldd r19,Z+49 + ldd r20,Z+50 + ldd r21,Z+51 + ldd r22,Z+52 + ldd r23,Z+53 + ldd r26,Z+54 + ldd r27,Z+55 + ldd r28,Z+56 + ldd r29,Z+57 + ldd r2,Z+58 + ldd r3,Z+59 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + rcall 1083f + rcall 1127f + ldi r25,8 + eor r18,r25 + eor r28,r25 + rcall 1083f + std Z+48,r18 + std Z+49,r19 + std Z+50,r20 + std Z+51,r21 + std Z+52,r22 + std Z+53,r23 + std Z+54,r26 + std Z+55,r27 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + std Z+60,r4 + std Z+61,r5 + std Z+62,r6 + std Z+63,r7 + rcall 1553f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+12 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+28 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+44 + eor r18,r17 + std Z+44,r18 + ldd r18,Z+52 + ldi r16,8 + eor r18,r16 + std Z+52,r18 + ldd r18,Z+60 + eor r18,r16 + std Z+60,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 1083f + rcall 1127f + ldi r25,1 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 1083f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 1083f + rcall 1127f + ldi r25,2 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 1083f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 1083f + rcall 1127f + ldi r25,4 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 1083f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + ldd r18,Z+48 + ldd r19,Z+49 + ldd r20,Z+50 + ldd r21,Z+51 + ldd r22,Z+52 + ldd r23,Z+53 + ldd r26,Z+54 + ldd r27,Z+55 + ldd r28,Z+56 + ldd r29,Z+57 + ldd r2,Z+58 + ldd r3,Z+59 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + rcall 1083f + rcall 1127f + ldi r25,8 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 1083f + std Z+48,r18 + std Z+49,r19 + std Z+50,r20 + std Z+51,r21 + std Z+52,r22 + std Z+53,r23 + std Z+54,r26 + std Z+55,r27 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + std Z+60,r4 + std Z+61,r5 + std Z+62,r6 + std Z+63,r7 + rcall 1553f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+8 + eor r18,r25 + std Z+8,r18 + ldd r18,Z+12 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+24 + eor r18,r24 + std Z+24,r18 + ldd r18,Z+28 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+40 + eor r18,r17 + std Z+40,r18 + ldd r18,Z+44 + eor r18,r17 + std Z+44,r18 + ldd r18,Z+52 + ldi r16,8 + eor r18,r16 + std Z+52,r18 + ldd r18,Z+56 + eor r18,r16 + std Z+56,r18 + ldd r18,Z+60 + eor r18,r16 + std Z+60,r18 + rjmp 1795f +1083: + movw r8,r18 + movw r10,r20 + and r8,r22 + and r9,r23 + and r10,r26 + and r11,r27 + eor r8,r28 + eor r9,r29 + eor r10,r2 + eor r11,r3 + movw r12,r4 + movw r14,r6 + and r12,r18 + and r13,r19 + and r14,r20 + and r15,r21 + eor r12,r22 + eor r13,r23 + eor r14,r26 + eor r15,r27 + movw r28,r8 + movw r2,r10 + and r28,r12 + and r29,r13 + and r2,r14 + and r3,r15 + eor r28,r4 + eor r29,r5 + eor r2,r6 + eor r3,r7 + and r4,r8 + and r5,r9 + and r6,r10 + and r7,r11 + eor r4,r18 + eor r5,r19 + eor r6,r20 + eor r7,r21 + movw r18,r12 + movw r20,r14 + movw r22,r8 + movw r26,r10 + ret +1127: + mov r8,r19 + mov r9,r20 + mov r10,r21 + mov r11,r18 + mov r0,r1 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + or r11,r0 + eor r8,r18 + eor r9,r19 + eor r10,r20 + eor r11,r21 + mov r12,r23 + mov r13,r26 + mov r14,r27 + mov r15,r22 + mov r0,r1 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + or r15,r0 + eor r12,r22 + eor r13,r23 + eor r14,r26 + eor r15,r27 + movw r24,r8 + movw r16,r10 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r8,r24 + eor r9,r25 + eor r10,r16 + eor r11,r17 + movw r24,r12 + movw r16,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r12,r24 + eor r13,r25 + eor r14,r16 + eor r15,r17 + mov r0,r20 + mov r20,r18 + mov r18,r0 + mov r0,r21 + mov r21,r19 + mov r19,r0 + bst r18,0 + lsr r21 + ror r20 + ror r19 + ror r18 + bld r21,7 + eor r18,r8 + eor r19,r9 + eor r20,r10 + eor r21,r11 + mov r0,r26 + mov r26,r22 + mov r22,r0 + mov r0,r27 + mov r27,r23 + mov r23,r0 + bst r22,0 + lsr r27 + ror r26 + ror r23 + ror r22 + bld r27,7 + eor r22,r12 + eor r23,r13 + eor r26,r14 + eor r27,r15 + movw r8,r18 + movw r10,r20 + lsl r8 + rol r9 + rol r10 + rol r11 + adc r8,r1 + eor r8,r18 + eor r9,r19 + eor r10,r20 + eor r11,r21 + movw r12,r22 + movw r14,r26 + lsl r12 + rol r13 + rol r14 + rol r15 + adc r12,r1 + eor r12,r22 + eor r13,r23 + eor r14,r26 + eor r15,r27 + mov r24,r15 + mov r25,r12 + mov r16,r13 + mov r17,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r18,r24 + eor r19,r25 + eor r20,r16 + eor r21,r17 + mov r24,r11 + mov r25,r8 + mov r16,r9 + mov r17,r10 + bst r24,0 + lsr r17 + ror r16 + ror r25 + ror r24 + bld r17,7 + eor r22,r24 + eor r23,r25 + eor r26,r16 + eor r27,r17 + lsl r10 + rol r11 + rol r8 + rol r9 + adc r10,r1 + eor r18,r10 + eor r19,r11 + eor r20,r8 + eor r21,r9 + lsl r14 + rol r15 + rol r12 + rol r13 + adc r14,r1 + eor r22,r14 + eor r23,r15 + eor r26,r12 + eor r27,r13 + mov r8,r29 + mov r9,r2 + mov r10,r3 + mov r11,r28 + mov r0,r1 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + or r11,r0 + eor r8,r28 + eor r9,r29 + eor r10,r2 + eor r11,r3 + mov r12,r5 + mov r13,r6 + mov r14,r7 + mov r15,r4 + mov r0,r1 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + or r15,r0 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + movw r24,r8 + movw r16,r10 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r8,r24 + eor r9,r25 + eor r10,r16 + eor r11,r17 + movw r24,r12 + movw r16,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r12,r24 + eor r13,r25 + eor r14,r16 + eor r15,r17 + mov r0,r2 + mov r2,r28 + mov r28,r0 + mov r0,r3 + mov r3,r29 + mov r29,r0 + bst r28,0 + lsr r3 + ror r2 + ror r29 + ror r28 + bld r3,7 + eor r28,r8 + eor r29,r9 + eor r2,r10 + eor r3,r11 + mov r0,r6 + mov r6,r4 + mov r4,r0 + mov r0,r7 + mov r7,r5 + mov r5,r0 + bst r4,0 + lsr r7 + ror r6 + ror r5 + ror r4 + bld r7,7 + eor r4,r12 + eor r5,r13 + eor r6,r14 + eor r7,r15 + movw r8,r28 + movw r10,r2 + lsl r8 + rol r9 + rol r10 + rol r11 + adc r8,r1 + eor r8,r28 + eor r9,r29 + eor r10,r2 + eor r11,r3 + movw r12,r4 + movw r14,r6 + lsl r12 + rol r13 + rol r14 + rol r15 + adc r12,r1 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + mov r24,r15 + mov r25,r12 + mov r16,r13 + mov r17,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r28,r24 + eor r29,r25 + eor r2,r16 + eor r3,r17 + mov r24,r11 + mov r25,r8 + mov r16,r9 + mov r17,r10 + bst r24,0 + lsr r17 + ror r16 + ror r25 + ror r24 + bld r17,7 + eor r4,r24 + eor r5,r25 + eor r6,r16 + eor r7,r17 + lsl r10 + rol r11 + rol r8 + rol r9 + adc r10,r1 + eor r28,r10 + eor r29,r11 + eor r2,r8 + eor r3,r9 + lsl r14 + rol r15 + rol r12 + rol r13 + adc r14,r1 + eor r4,r14 + eor r5,r15 + eor r6,r12 + eor r7,r13 + ret +1553: + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+16 + ldd r23,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + ldd r28,Z+32 + ldd r29,Z+33 + ldd r2,Z+34 + ldd r3,Z+35 + ldd r4,Z+48 + ldd r5,Z+49 + ldd r6,Z+50 + ldd r7,Z+51 + movw r8,r18 + movw r10,r20 + eor r8,r22 + eor r9,r23 + eor r10,r26 + eor r11,r27 + movw r12,r28 + movw r14,r2 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + eor r18,r12 + eor r19,r13 + eor r20,r14 + eor r21,r15 + eor r22,r12 + eor r23,r13 + eor r26,r14 + eor r27,r15 + eor r28,r8 + eor r29,r9 + eor r2,r10 + eor r3,r11 + eor r4,r8 + eor r5,r9 + eor r6,r10 + eor r7,r11 + st Z,r22 + std Z+1,r23 + std Z+2,r26 + std Z+3,r27 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+32,r4 + std Z+33,r5 + std Z+34,r6 + std Z+35,r7 + std Z+48,r28 + std Z+49,r29 + std Z+50,r2 + std Z+51,r3 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r20,Z+6 + ldd r21,Z+7 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+36 + ldd r29,Z+37 + ldd r2,Z+38 + ldd r3,Z+39 + ldd r4,Z+52 + ldd r5,Z+53 + ldd r6,Z+54 + ldd r7,Z+55 + movw r8,r18 + movw r10,r20 + eor r8,r22 + eor r9,r23 + eor r10,r26 + eor r11,r27 + movw r12,r28 + movw r14,r2 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + eor r18,r12 + eor r19,r13 + eor r20,r14 + eor r21,r15 + eor r22,r12 + eor r23,r13 + eor r26,r14 + eor r27,r15 + eor r28,r8 + eor r29,r9 + eor r2,r10 + eor r3,r11 + eor r4,r8 + eor r5,r9 + eor r6,r10 + eor r7,r11 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + std Z+36,r4 + std Z+37,r5 + std Z+38,r6 + std Z+39,r7 + std Z+52,r28 + std Z+53,r29 + std Z+54,r2 + std Z+55,r3 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r20,Z+10 + ldd r21,Z+11 + ldd r22,Z+24 + ldd r23,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+56 + ldd r5,Z+57 + ldd r6,Z+58 + ldd r7,Z+59 + movw r8,r18 + movw r10,r20 + eor r8,r22 + eor r9,r23 + eor r10,r26 + eor r11,r27 + movw r12,r28 + movw r14,r2 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + eor r18,r12 + eor r19,r13 + eor r20,r14 + eor r21,r15 + eor r22,r12 + eor r23,r13 + eor r26,r14 + eor r27,r15 + eor r28,r8 + eor r29,r9 + eor r2,r10 + eor r3,r11 + eor r4,r8 + eor r5,r9 + eor r6,r10 + eor r7,r11 + std Z+8,r22 + std Z+9,r23 + std Z+10,r26 + std Z+11,r27 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + std Z+40,r4 + std Z+41,r5 + std Z+42,r6 + std Z+43,r7 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r20,Z+14 + ldd r21,Z+15 + ldd r22,Z+28 + ldd r23,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + ldd r28,Z+44 + ldd r29,Z+45 + ldd r2,Z+46 + ldd r3,Z+47 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + movw r8,r18 + movw r10,r20 + eor r8,r22 + eor r9,r23 + eor r10,r26 + eor r11,r27 + movw r12,r28 + movw r14,r2 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + eor r18,r12 + eor r19,r13 + eor r20,r14 + eor r21,r15 + eor r22,r12 + eor r23,r13 + eor r26,r14 + eor r27,r15 + eor r28,r8 + eor r29,r9 + eor r2,r10 + eor r3,r11 + eor r4,r8 + eor r5,r9 + eor r6,r10 + eor r7,r11 + std Z+12,r22 + std Z+13,r23 + std Z+14,r26 + std Z+15,r27 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + std Z+60,r28 + std Z+61,r29 + std Z+62,r2 + std Z+63,r3 + ret +1795: + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size shadow512, .-shadow512 + + .text +.global shadow384 + .type shadow384, @function +shadow384: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 814f + rcall 858f + ldi r25,1 + eor r18,r25 + rcall 814f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 814f + rcall 858f + ldi r25,2 + eor r18,r25 + rcall 814f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 814f + rcall 858f + ldi r25,4 + eor r18,r25 + rcall 814f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + rcall 1284f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 814f + rcall 858f + ldi r25,1 + eor r28,r25 + rcall 814f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 814f + rcall 858f + ldi r25,2 + eor r28,r25 + rcall 814f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 814f + rcall 858f + ldi r25,4 + eor r28,r25 + rcall 814f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + rcall 1284f + ldd r18,Z+12 + ldi r25,1 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+28 + ldi r24,2 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+44 + ldi r17,4 + eor r18,r17 + std Z+44,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 814f + rcall 858f + ldi r25,1 + eor r18,r25 + eor r22,r25 + rcall 814f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 814f + rcall 858f + ldi r25,2 + eor r18,r25 + eor r22,r25 + rcall 814f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 814f + rcall 858f + ldi r25,4 + eor r18,r25 + eor r22,r25 + rcall 814f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + rcall 1284f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+8 + eor r18,r25 + std Z+8,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+24 + eor r18,r24 + std Z+24,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+40 + eor r18,r17 + std Z+40,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 814f + rcall 858f + ldi r25,1 + eor r28,r25 + eor r4,r25 + rcall 814f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 814f + rcall 858f + ldi r25,2 + eor r28,r25 + eor r4,r25 + rcall 814f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 814f + rcall 858f + ldi r25,4 + eor r28,r25 + eor r4,r25 + rcall 814f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + rcall 1284f + ld r18,Z + ldi r25,1 + eor r18,r25 + st Z,r18 + ldd r18,Z+4 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+12 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+16 + ldi r24,2 + eor r18,r24 + std Z+16,r18 + ldd r18,Z+20 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+28 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+32 + ldi r17,4 + eor r18,r17 + std Z+32,r18 + ldd r18,Z+36 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+44 + eor r18,r17 + std Z+44,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 814f + rcall 858f + ldi r25,1 + eor r18,r25 + eor r28,r25 + rcall 814f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 814f + rcall 858f + ldi r25,2 + eor r18,r25 + eor r28,r25 + rcall 814f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 814f + rcall 858f + ldi r25,4 + eor r18,r25 + eor r28,r25 + rcall 814f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + rcall 1284f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+12 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+28 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+44 + eor r18,r17 + std Z+44,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 814f + rcall 858f + ldi r25,1 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 814f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 814f + rcall 858f + ldi r25,2 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 814f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 814f + rcall 858f + ldi r25,4 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 814f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + rcall 1284f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+8 + eor r18,r25 + std Z+8,r18 + ldd r18,Z+12 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+24 + eor r18,r24 + std Z+24,r18 + ldd r18,Z+28 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+40 + eor r18,r17 + std Z+40,r18 + ldd r18,Z+44 + eor r18,r17 + std Z+44,r18 + rjmp 1430f +814: + movw r8,r18 + movw r10,r20 + and r8,r22 + and r9,r23 + and r10,r26 + and r11,r27 + eor r8,r28 + eor r9,r29 + eor r10,r2 + eor r11,r3 + movw r12,r4 + movw r14,r6 + and r12,r18 + and r13,r19 + and r14,r20 + and r15,r21 + eor r12,r22 + eor r13,r23 + eor r14,r26 + eor r15,r27 + movw r28,r8 + movw r2,r10 + and r28,r12 + and r29,r13 + and r2,r14 + and r3,r15 + eor r28,r4 + eor r29,r5 + eor r2,r6 + eor r3,r7 + and r4,r8 + and r5,r9 + and r6,r10 + and r7,r11 + eor r4,r18 + eor r5,r19 + eor r6,r20 + eor r7,r21 + movw r18,r12 + movw r20,r14 + movw r22,r8 + movw r26,r10 + ret +858: + mov r8,r19 + mov r9,r20 + mov r10,r21 + mov r11,r18 + mov r0,r1 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + or r11,r0 + eor r8,r18 + eor r9,r19 + eor r10,r20 + eor r11,r21 + mov r12,r23 + mov r13,r26 + mov r14,r27 + mov r15,r22 + mov r0,r1 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + or r15,r0 + eor r12,r22 + eor r13,r23 + eor r14,r26 + eor r15,r27 + movw r24,r8 + movw r16,r10 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r8,r24 + eor r9,r25 + eor r10,r16 + eor r11,r17 + movw r24,r12 + movw r16,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r12,r24 + eor r13,r25 + eor r14,r16 + eor r15,r17 + mov r0,r20 + mov r20,r18 + mov r18,r0 + mov r0,r21 + mov r21,r19 + mov r19,r0 + bst r18,0 + lsr r21 + ror r20 + ror r19 + ror r18 + bld r21,7 + eor r18,r8 + eor r19,r9 + eor r20,r10 + eor r21,r11 + mov r0,r26 + mov r26,r22 + mov r22,r0 + mov r0,r27 + mov r27,r23 + mov r23,r0 + bst r22,0 + lsr r27 + ror r26 + ror r23 + ror r22 + bld r27,7 + eor r22,r12 + eor r23,r13 + eor r26,r14 + eor r27,r15 + movw r8,r18 + movw r10,r20 + lsl r8 + rol r9 + rol r10 + rol r11 + adc r8,r1 + eor r8,r18 + eor r9,r19 + eor r10,r20 + eor r11,r21 + movw r12,r22 + movw r14,r26 + lsl r12 + rol r13 + rol r14 + rol r15 + adc r12,r1 + eor r12,r22 + eor r13,r23 + eor r14,r26 + eor r15,r27 + mov r24,r15 + mov r25,r12 + mov r16,r13 + mov r17,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r18,r24 + eor r19,r25 + eor r20,r16 + eor r21,r17 + mov r24,r11 + mov r25,r8 + mov r16,r9 + mov r17,r10 + bst r24,0 + lsr r17 + ror r16 + ror r25 + ror r24 + bld r17,7 + eor r22,r24 + eor r23,r25 + eor r26,r16 + eor r27,r17 + lsl r10 + rol r11 + rol r8 + rol r9 + adc r10,r1 + eor r18,r10 + eor r19,r11 + eor r20,r8 + eor r21,r9 + lsl r14 + rol r15 + rol r12 + rol r13 + adc r14,r1 + eor r22,r14 + eor r23,r15 + eor r26,r12 + eor r27,r13 + mov r8,r29 + mov r9,r2 + mov r10,r3 + mov r11,r28 + mov r0,r1 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + or r11,r0 + eor r8,r28 + eor r9,r29 + eor r10,r2 + eor r11,r3 + mov r12,r5 + mov r13,r6 + mov r14,r7 + mov r15,r4 + mov r0,r1 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + or r15,r0 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + movw r24,r8 + movw r16,r10 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r8,r24 + eor r9,r25 + eor r10,r16 + eor r11,r17 + movw r24,r12 + movw r16,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r12,r24 + eor r13,r25 + eor r14,r16 + eor r15,r17 + mov r0,r2 + mov r2,r28 + mov r28,r0 + mov r0,r3 + mov r3,r29 + mov r29,r0 + bst r28,0 + lsr r3 + ror r2 + ror r29 + ror r28 + bld r3,7 + eor r28,r8 + eor r29,r9 + eor r2,r10 + eor r3,r11 + mov r0,r6 + mov r6,r4 + mov r4,r0 + mov r0,r7 + mov r7,r5 + mov r5,r0 + bst r4,0 + lsr r7 + ror r6 + ror r5 + ror r4 + bld r7,7 + eor r4,r12 + eor r5,r13 + eor r6,r14 + eor r7,r15 + movw r8,r28 + movw r10,r2 + lsl r8 + rol r9 + rol r10 + rol r11 + adc r8,r1 + eor r8,r28 + eor r9,r29 + eor r10,r2 + eor r11,r3 + movw r12,r4 + movw r14,r6 + lsl r12 + rol r13 + rol r14 + rol r15 + adc r12,r1 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + mov r24,r15 + mov r25,r12 + mov r16,r13 + mov r17,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r28,r24 + eor r29,r25 + eor r2,r16 + eor r3,r17 + mov r24,r11 + mov r25,r8 + mov r16,r9 + mov r17,r10 + bst r24,0 + lsr r17 + ror r16 + ror r25 + ror r24 + bld r17,7 + eor r4,r24 + eor r5,r25 + eor r6,r16 + eor r7,r17 + lsl r10 + rol r11 + rol r8 + rol r9 + adc r10,r1 + eor r28,r10 + eor r29,r11 + eor r2,r8 + eor r3,r9 + lsl r14 + rol r15 + rol r12 + rol r13 + adc r14,r1 + eor r4,r14 + eor r5,r15 + eor r6,r12 + eor r7,r13 + ret +1284: + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+16 + ldd r23,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + ldd r28,Z+32 + ldd r29,Z+33 + ldd r2,Z+34 + ldd r3,Z+35 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + std Z+32,r22 + std Z+33,r23 + std Z+34,r26 + std Z+35,r27 + eor r22,r28 + eor r23,r29 + eor r26,r2 + eor r27,r3 + st Z,r22 + std Z+1,r23 + std Z+2,r26 + std Z+3,r27 + eor r18,r28 + eor r19,r29 + eor r20,r2 + eor r21,r3 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r20,Z+6 + ldd r21,Z+7 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+36 + ldd r29,Z+37 + ldd r2,Z+38 + ldd r3,Z+39 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + eor r22,r28 + eor r23,r29 + eor r26,r2 + eor r27,r3 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + eor r18,r28 + eor r19,r29 + eor r20,r2 + eor r21,r3 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r20,Z+10 + ldd r21,Z+11 + ldd r22,Z+24 + ldd r23,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + std Z+40,r22 + std Z+41,r23 + std Z+42,r26 + std Z+43,r27 + eor r22,r28 + eor r23,r29 + eor r26,r2 + eor r27,r3 + std Z+8,r22 + std Z+9,r23 + std Z+10,r26 + std Z+11,r27 + eor r18,r28 + eor r19,r29 + eor r20,r2 + eor r21,r3 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r20,Z+14 + ldd r21,Z+15 + ldd r22,Z+28 + ldd r23,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + ldd r28,Z+44 + ldd r29,Z+45 + ldd r2,Z+46 + ldd r3,Z+47 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + std Z+44,r22 + std Z+45,r23 + std Z+46,r26 + std Z+47,r27 + eor r22,r28 + eor r23,r29 + eor r26,r2 + eor r27,r3 + std Z+12,r22 + std Z+13,r23 + std Z+14,r26 + std Z+15,r27 + eor r18,r28 + eor r19,r29 + eor r20,r2 + eor r21,r3 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ret +1430: + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size shadow384, .-shadow384 + +#endif diff --git a/spook/Implementations/crypto_aead/spook128su384v1/rhys/internal-spook.c b/spook/Implementations/crypto_aead/spook128su384v1/rhys/internal-spook.c index 0e19216..068938b 100644 --- a/spook/Implementations/crypto_aead/spook128su384v1/rhys/internal-spook.c +++ b/spook/Implementations/crypto_aead/spook128su384v1/rhys/internal-spook.c @@ -22,6 +22,8 @@ #include "internal-spook.h" +#if !defined(__AVR__) + /** * \brief Number of steps in the Clyde-128 block cipher. * @@ -43,9 +45,9 @@ static uint8_t const rc[CLYDE128_STEPS][8] = { }; void clyde128_encrypt(const unsigned char key[CLYDE128_KEY_SIZE], - const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4], uint32_t output[CLYDE128_BLOCK_SIZE / 4], - const uint32_t input[CLYDE128_BLOCK_SIZE / 4]) + const uint32_t input[CLYDE128_BLOCK_SIZE / 4], + const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4]) { uint32_t k0, k1, k2, k3; uint32_t t0, t1, t2, t3; @@ -154,9 +156,9 @@ void clyde128_encrypt(const unsigned char key[CLYDE128_KEY_SIZE], } void clyde128_decrypt(const unsigned char key[CLYDE128_KEY_SIZE], - const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4], uint32_t output[CLYDE128_BLOCK_SIZE / 4], - const unsigned char input[CLYDE128_BLOCK_SIZE]) + const unsigned char input[CLYDE128_BLOCK_SIZE], + const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4]) { uint32_t k0, k1, k2, k3; uint32_t t0, t1, t2, t3; @@ -555,3 +557,5 @@ void shadow384(shadow384_state_t *state) le_store_word32(state->B + 44, s23); #endif } + +#endif /* !__AVR__ */ diff --git a/spook/Implementations/crypto_aead/spook128su384v1/rhys/internal-spook.h b/spook/Implementations/crypto_aead/spook128su384v1/rhys/internal-spook.h index b08ce80..77c8b86 100644 --- a/spook/Implementations/crypto_aead/spook128su384v1/rhys/internal-spook.h +++ b/spook/Implementations/crypto_aead/spook128su384v1/rhys/internal-spook.h @@ -93,31 +93,31 @@ typedef union * \brief Encrypts a block with the Clyde-128 block cipher. * * \param key Points to the key to encrypt with. - * \param tweak Points to the tweak to encrypt with. * \param output Output buffer for the ciphertext. * \param input Input buffer for the plaintext. + * \param tweak Points to the tweak to encrypt with. * * \sa clyde128_decrypt() */ void clyde128_encrypt(const unsigned char key[CLYDE128_KEY_SIZE], - const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4], uint32_t output[CLYDE128_BLOCK_SIZE / 4], - const uint32_t input[CLYDE128_BLOCK_SIZE / 4]); + const uint32_t input[CLYDE128_BLOCK_SIZE / 4], + const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4]); /** * \brief Decrypts a block with the Clyde-128 block cipher. * * \param key Points to the key to decrypt with. - * \param tweak Points to the tweak to decrypt with. * \param output Output buffer for the plaintext. * \param input Input buffer for the ciphertext. + * \param tweak Points to the tweak to decrypt with. * * \sa clyde128_encrypt() */ void clyde128_decrypt(const unsigned char key[CLYDE128_KEY_SIZE], - const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4], uint32_t output[CLYDE128_BLOCK_SIZE / 4], - const unsigned char input[CLYDE128_BLOCK_SIZE]); + const unsigned char input[CLYDE128_BLOCK_SIZE], + const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4]); /** * \brief Performs the Shadow-512 permutation on a state. diff --git a/spook/Implementations/crypto_aead/spook128su384v1/rhys/spook.c b/spook/Implementations/crypto_aead/spook128su384v1/rhys/spook.c index d075b33..2dbab94 100644 --- a/spook/Implementations/crypto_aead/spook128su384v1/rhys/spook.c +++ b/spook/Implementations/crypto_aead/spook128su384v1/rhys/spook.c @@ -86,7 +86,7 @@ static void spook_128_512_init state->B[CLYDE128_BLOCK_SIZE - 1] |= 0x40; } memcpy(state->B + CLYDE128_BLOCK_SIZE, npub, CLYDE128_BLOCK_SIZE); - clyde128_encrypt(k, state->W, state->W + 12, state->W + 4); + clyde128_encrypt(k, state->W + 12, state->W + 4, state->W); shadow512(state); } @@ -111,7 +111,7 @@ static void spook_128_384_init state->B[CLYDE128_BLOCK_SIZE - 1] |= 0x40; } memcpy(state->B + CLYDE128_BLOCK_SIZE, npub, CLYDE128_BLOCK_SIZE); - clyde128_encrypt(k, state->W, state->W + 8, state->W + 4); + clyde128_encrypt(k, state->W + 8, state->W + 4, state->W); shadow384(state); } @@ -310,7 +310,7 @@ int spook_128_512_su_aead_encrypt /* Compute the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_encrypt(k, state.W + 4, state.W, state.W); + clyde128_encrypt(k, state.W, state.W, state.W + 4); memcpy(c + mlen, state.B, SPOOK_TAG_SIZE); return 0; } @@ -345,7 +345,7 @@ int spook_128_512_su_aead_decrypt /* Check the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_decrypt(k, state.W + 4, state.W + 4, c + clen); + clyde128_decrypt(k, state.W + 4, c + clen, state.W + 4); return aead_check_tag (m, clen, state.B, state.B + CLYDE128_BLOCK_SIZE, SPOOK_TAG_SIZE); } @@ -377,7 +377,7 @@ int spook_128_384_su_aead_encrypt /* Compute the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_encrypt(k, state.W + 4, state.W, state.W); + clyde128_encrypt(k, state.W, state.W, state.W + 4); memcpy(c + mlen, state.B, SPOOK_TAG_SIZE); return 0; } @@ -412,7 +412,7 @@ int spook_128_384_su_aead_decrypt /* Check the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_decrypt(k, state.W + 4, state.W + 4, c + clen); + clyde128_decrypt(k, state.W + 4, c + clen, state.W + 4); return aead_check_tag (m, clen, state.B, state.B + CLYDE128_BLOCK_SIZE, SPOOK_TAG_SIZE); } @@ -444,7 +444,7 @@ int spook_128_512_mu_aead_encrypt /* Compute the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_encrypt(k, state.W + 4, state.W, state.W); + clyde128_encrypt(k, state.W, state.W, state.W + 4); memcpy(c + mlen, state.B, SPOOK_TAG_SIZE); return 0; } @@ -479,7 +479,7 @@ int spook_128_512_mu_aead_decrypt /* Check the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_decrypt(k, state.W + 4, state.W + 4, c + clen); + clyde128_decrypt(k, state.W + 4, c + clen, state.W + 4); return aead_check_tag (m, clen, state.B, state.B + CLYDE128_BLOCK_SIZE, SPOOK_TAG_SIZE); } @@ -511,7 +511,7 @@ int spook_128_384_mu_aead_encrypt /* Compute the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_encrypt(k, state.W + 4, state.W, state.W); + clyde128_encrypt(k, state.W, state.W, state.W + 4); memcpy(c + mlen, state.B, SPOOK_TAG_SIZE); return 0; } @@ -546,7 +546,7 @@ int spook_128_384_mu_aead_decrypt /* Check the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_decrypt(k, state.W + 4, state.W + 4, c + clen); + clyde128_decrypt(k, state.W + 4, c + clen, state.W + 4); return aead_check_tag (m, clen, state.B, state.B + CLYDE128_BLOCK_SIZE, SPOOK_TAG_SIZE); } diff --git a/spook/Implementations/crypto_aead/spook128su512v1/rhys/internal-spook-avr.S b/spook/Implementations/crypto_aead/spook128su512v1/rhys/internal-spook-avr.S new file mode 100644 index 0000000..05ca51e --- /dev/null +++ b/spook/Implementations/crypto_aead/spook128su512v1/rhys/internal-spook-avr.S @@ -0,0 +1,4898 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .text +.global clyde128_encrypt + .type clyde128_encrypt, @function +clyde128_encrypt: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r23 + push r22 + movw r30,r24 + movw r26,r20 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 36 + ld r20,X+ + ld r21,X+ + ld r22,X+ + ld r23,X+ + ld r2,X+ + ld r3,X+ + ld r4,X+ + ld r5,X+ + ld r6,X+ + ld r7,X+ + ld r8,X+ + ld r9,X+ + ld r10,X+ + ld r11,X+ + ld r12,X+ + ld r13,X+ + ld r0,Z + eor r20,r0 + ldd r0,Z+1 + eor r21,r0 + ldd r0,Z+2 + eor r22,r0 + ldd r0,Z+3 + eor r23,r0 + ldd r0,Z+4 + eor r2,r0 + ldd r0,Z+5 + eor r3,r0 + ldd r0,Z+6 + eor r4,r0 + ldd r0,Z+7 + eor r5,r0 + ldd r0,Z+8 + eor r6,r0 + ldd r0,Z+9 + eor r7,r0 + ldd r0,Z+10 + eor r8,r0 + ldd r0,Z+11 + eor r9,r0 + ldd r0,Z+12 + eor r10,r0 + ldd r0,Z+13 + eor r11,r0 + ldd r0,Z+14 + eor r12,r0 + ldd r0,Z+15 + eor r13,r0 + movw r26,r18 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+1,r18 + std Y+2,r19 + std Y+3,r14 + std Y+4,r15 + eor r20,r18 + eor r21,r19 + eor r22,r14 + eor r23,r15 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+5,r18 + std Y+6,r19 + std Y+7,r14 + std Y+8,r15 + eor r2,r18 + eor r3,r19 + eor r4,r14 + eor r5,r15 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+9,r18 + std Y+10,r19 + std Y+11,r14 + std Y+12,r15 + eor r6,r18 + eor r7,r19 + eor r8,r14 + eor r9,r15 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+13,r18 + std Y+14,r19 + std Y+15,r14 + std Y+16,r15 + eor r10,r18 + eor r11,r19 + eor r12,r14 + eor r13,r15 + rcall 151f + ldi r27,1 + eor r20,r27 + rcall 151f + ldi r27,1 + eor r2,r27 + rcall 635f + rcall 151f + ldi r27,1 + eor r6,r27 + rcall 151f + ldi r27,1 + eor r10,r27 + rcall 635f + rcall 151f + ldi r27,1 + eor r20,r27 + eor r2,r27 + rcall 151f + ldi r27,1 + eor r2,r27 + eor r6,r27 + rcall 635f + rcall 151f + ldi r27,1 + eor r6,r27 + eor r10,r27 + rcall 151f + ldi r27,1 + eor r20,r27 + eor r2,r27 + eor r10,r27 + rcall 635f + rcall 151f + ldi r27,1 + eor r20,r27 + eor r6,r27 + rcall 151f + ldi r27,1 + eor r2,r27 + eor r10,r27 + rcall 635f + rcall 151f + ldi r27,1 + eor r20,r27 + eor r2,r27 + eor r6,r27 + rcall 151f + ldi r27,1 + eor r2,r27 + eor r6,r27 + eor r10,r27 + rcall 635f + rjmp 725f +151: + movw r18,r20 + movw r14,r22 + and r18,r2 + and r19,r3 + and r14,r4 + and r15,r5 + eor r18,r6 + eor r19,r7 + eor r14,r8 + eor r15,r9 + movw r24,r10 + movw r16,r12 + and r24,r20 + and r25,r21 + and r16,r22 + and r17,r23 + eor r24,r2 + eor r25,r3 + eor r16,r4 + eor r17,r5 + movw r6,r18 + movw r8,r14 + and r6,r24 + and r7,r25 + and r8,r16 + and r9,r17 + eor r6,r10 + eor r7,r11 + eor r8,r12 + eor r9,r13 + and r10,r18 + and r11,r19 + and r12,r14 + and r13,r15 + eor r10,r20 + eor r11,r21 + eor r12,r22 + eor r13,r23 + movw r20,r24 + movw r22,r16 + movw r2,r18 + movw r4,r14 + mov r18,r21 + mov r19,r22 + mov r14,r23 + mov r15,r20 + eor r0,r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + or r15,r0 + eor r18,r20 + eor r19,r21 + eor r14,r22 + eor r15,r23 + mov r24,r3 + mov r25,r4 + mov r16,r5 + mov r17,r2 + eor r0,r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r24,r2 + eor r25,r3 + eor r16,r4 + eor r17,r5 + movw r26,r18 + mov r1,r14 + mov r0,r15 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r18,r26 + eor r19,r27 + eor r14,r1 + eor r15,r0 + movw r26,r24 + mov r1,r16 + mov r0,r17 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r24,r26 + eor r25,r27 + eor r16,r1 + eor r17,r0 + mov r0,r22 + mov r22,r20 + mov r20,r0 + mov r0,r23 + mov r23,r21 + mov r21,r0 + bst r20,0 + lsr r23 + ror r22 + ror r21 + ror r20 + bld r23,7 + eor r20,r18 + eor r21,r19 + eor r22,r14 + eor r23,r15 + mov r0,r4 + mov r4,r2 + mov r2,r0 + mov r0,r5 + mov r5,r3 + mov r3,r0 + bst r2,0 + lsr r5 + ror r4 + ror r3 + ror r2 + bld r5,7 + eor r2,r24 + eor r3,r25 + eor r4,r16 + eor r5,r17 + movw r18,r20 + movw r14,r22 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + eor r18,r20 + eor r19,r21 + eor r14,r22 + eor r15,r23 + movw r24,r2 + movw r16,r4 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + eor r24,r2 + eor r25,r3 + eor r16,r4 + eor r17,r5 + mov r26,r17 + mov r27,r24 + mov r1,r25 + mov r0,r16 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r20,r26 + eor r21,r27 + eor r22,r1 + eor r23,r0 + mov r26,r15 + mov r27,r18 + mov r1,r19 + mov r0,r14 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r2,r26 + eor r3,r27 + eor r4,r1 + eor r5,r0 + bst r19,7 + lsl r14 + rol r15 + rol r18 + rol r19 + bld r14,0 + eor r20,r14 + eor r21,r15 + eor r22,r18 + eor r23,r19 + bst r25,7 + lsl r16 + rol r17 + rol r24 + rol r25 + bld r16,0 + eor r2,r16 + eor r3,r17 + eor r4,r24 + eor r5,r25 + mov r18,r7 + mov r19,r8 + mov r14,r9 + mov r15,r6 + eor r0,r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + lsr r15 + ror r14 + ror r19 + ror r18 + ror r0 + or r15,r0 + eor r18,r6 + eor r19,r7 + eor r14,r8 + eor r15,r9 + mov r24,r11 + mov r25,r12 + mov r16,r13 + mov r17,r10 + eor r0,r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r24,r10 + eor r25,r11 + eor r16,r12 + eor r17,r13 + movw r26,r18 + mov r1,r14 + mov r0,r15 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r18,r26 + eor r19,r27 + eor r14,r1 + eor r15,r0 + movw r26,r24 + mov r1,r16 + mov r0,r17 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r24,r26 + eor r25,r27 + eor r16,r1 + eor r17,r0 + mov r0,r8 + mov r8,r6 + mov r6,r0 + mov r0,r9 + mov r9,r7 + mov r7,r0 + bst r6,0 + lsr r9 + ror r8 + ror r7 + ror r6 + bld r9,7 + eor r6,r18 + eor r7,r19 + eor r8,r14 + eor r9,r15 + mov r0,r12 + mov r12,r10 + mov r10,r0 + mov r0,r13 + mov r13,r11 + mov r11,r0 + bst r10,0 + lsr r13 + ror r12 + ror r11 + ror r10 + bld r13,7 + eor r10,r24 + eor r11,r25 + eor r12,r16 + eor r13,r17 + movw r18,r6 + movw r14,r8 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + eor r18,r6 + eor r19,r7 + eor r14,r8 + eor r15,r9 + movw r24,r10 + movw r16,r12 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + eor r24,r10 + eor r25,r11 + eor r16,r12 + eor r17,r13 + mov r26,r17 + mov r27,r24 + mov r1,r25 + mov r0,r16 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r6,r26 + eor r7,r27 + eor r8,r1 + eor r9,r0 + mov r26,r15 + mov r27,r18 + mov r1,r19 + mov r0,r14 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r10,r26 + eor r11,r27 + eor r12,r1 + eor r13,r0 + bst r19,7 + lsl r14 + rol r15 + rol r18 + rol r19 + bld r14,0 + eor r6,r14 + eor r7,r15 + eor r8,r18 + eor r9,r19 + bst r25,7 + lsl r16 + rol r17 + rol r24 + rol r25 + bld r16,0 + eor r10,r16 + eor r11,r17 + eor r12,r24 + eor r13,r25 + ret +635: + ldd r18,Y+9 + ldd r19,Y+10 + ldd r14,Y+11 + ldd r15,Y+12 + ldd r24,Y+1 + ldd r25,Y+2 + ldd r16,Y+3 + ldd r17,Y+4 + eor r18,r24 + eor r19,r25 + eor r14,r16 + eor r15,r17 + std Y+9,r24 + std Y+10,r25 + std Y+11,r16 + std Y+12,r17 + std Y+1,r18 + std Y+2,r19 + std Y+3,r14 + std Y+4,r15 + eor r20,r18 + eor r21,r19 + eor r22,r14 + eor r23,r15 + eor r6,r24 + eor r7,r25 + eor r8,r16 + eor r9,r17 + ldd r18,Y+13 + ldd r19,Y+14 + ldd r14,Y+15 + ldd r15,Y+16 + ldd r24,Y+5 + ldd r25,Y+6 + ldd r16,Y+7 + ldd r17,Y+8 + eor r18,r24 + eor r19,r25 + eor r14,r16 + eor r15,r17 + std Y+13,r24 + std Y+14,r25 + std Y+15,r16 + std Y+16,r17 + std Y+5,r18 + std Y+6,r19 + std Y+7,r14 + std Y+8,r15 + eor r2,r18 + eor r3,r19 + eor r4,r14 + eor r5,r15 + eor r10,r24 + eor r11,r25 + eor r12,r16 + eor r13,r17 + ld r0,Z + eor r20,r0 + ldd r0,Z+1 + eor r21,r0 + ldd r0,Z+2 + eor r22,r0 + ldd r0,Z+3 + eor r23,r0 + ldd r0,Z+4 + eor r2,r0 + ldd r0,Z+5 + eor r3,r0 + ldd r0,Z+6 + eor r4,r0 + ldd r0,Z+7 + eor r5,r0 + ldd r0,Z+8 + eor r6,r0 + ldd r0,Z+9 + eor r7,r0 + ldd r0,Z+10 + eor r8,r0 + ldd r0,Z+11 + eor r9,r0 + ldd r0,Z+12 + eor r10,r0 + ldd r0,Z+13 + eor r11,r0 + ldd r0,Z+14 + eor r12,r0 + ldd r0,Z+15 + eor r13,r0 + ret +725: + ldd r26,Y+17 + ldd r27,Y+18 + st X+,r20 + st X+,r21 + st X+,r22 + st X+,r23 + st X+,r2 + st X+,r3 + st X+,r4 + st X+,r5 + st X+,r6 + st X+,r7 + st X+,r8 + st X+,r9 + st X+,r10 + st X+,r11 + st X+,r12 + st X+,r13 + adiw r28,18 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size clyde128_encrypt, .-clyde128_encrypt + + .text +.global clyde128_decrypt + .type clyde128_decrypt, @function +clyde128_decrypt: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r23 + push r22 + movw r30,r24 + movw r26,r20 + in r28,0x3d + in r29,0x3e + sbiw r28,16 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 36 + ld r20,X+ + ld r21,X+ + ld r22,X+ + ld r23,X+ + ld r2,X+ + ld r3,X+ + ld r4,X+ + ld r5,X+ + ld r6,X+ + ld r7,X+ + ld r8,X+ + ld r9,X+ + ld r10,X+ + ld r11,X+ + ld r12,X+ + ld r13,X+ + movw r26,r18 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+1,r18 + std Y+2,r19 + std Y+3,r14 + std Y+4,r15 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+5,r18 + std Y+6,r19 + std Y+7,r14 + std Y+8,r15 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+9,r18 + std Y+10,r19 + std Y+11,r14 + std Y+12,r15 + ld r18,X+ + ld r19,X+ + ld r14,X+ + ld r15,X+ + std Y+13,r18 + std Y+14,r19 + std Y+15,r14 + std Y+16,r15 + rcall 533f + ldi r27,1 + eor r2,r27 + eor r6,r27 + eor r10,r27 + rcall 103f + ldi r27,1 + eor r20,r27 + eor r2,r27 + eor r6,r27 + rcall 103f + rcall 533f + ldi r27,1 + eor r2,r27 + eor r10,r27 + rcall 103f + ldi r27,1 + eor r20,r27 + eor r6,r27 + rcall 103f + rcall 533f + ldi r27,1 + eor r20,r27 + eor r2,r27 + eor r10,r27 + rcall 103f + ldi r27,1 + eor r6,r27 + eor r10,r27 + rcall 103f + rcall 533f + ldi r27,1 + eor r2,r27 + eor r6,r27 + rcall 103f + ldi r27,1 + eor r20,r27 + eor r2,r27 + rcall 103f + rcall 533f + ldi r27,1 + eor r10,r27 + rcall 103f + ldi r27,1 + eor r6,r27 + rcall 103f + rcall 533f + ldi r27,1 + eor r2,r27 + rcall 103f + ldi r27,1 + eor r20,r27 + rcall 103f + rjmp 623f +103: + mov r18,r23 + mov r19,r20 + mov r14,r21 + mov r15,r22 + bst r18,0 + lsr r15 + ror r14 + ror r19 + ror r18 + bld r15,7 + eor r18,r20 + eor r19,r21 + eor r14,r22 + eor r15,r23 + mov r24,r5 + mov r25,r2 + mov r16,r3 + mov r17,r4 + bst r24,0 + lsr r17 + ror r16 + ror r25 + ror r24 + bld r17,7 + eor r24,r2 + eor r25,r3 + eor r16,r4 + eor r17,r5 + movw r26,r18 + mov r1,r14 + mov r0,r15 + bst r0,7 + lsl r26 + rol r27 + rol r1 + rol r0 + bld r26,0 + eor r20,r26 + eor r21,r27 + eor r22,r1 + eor r23,r0 + movw r26,r24 + mov r1,r16 + mov r0,r17 + bst r0,7 + lsl r26 + rol r27 + rol r1 + rol r0 + bld r26,0 + eor r2,r26 + eor r3,r27 + eor r4,r1 + eor r5,r0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + eor r20,r15 + eor r21,r18 + eor r22,r19 + eor r23,r14 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + eor r2,r17 + eor r3,r24 + eor r4,r25 + eor r5,r16 + movw r18,r20 + movw r14,r22 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + eor r18,r20 + eor r19,r21 + eor r14,r22 + eor r15,r23 + movw r24,r2 + movw r16,r4 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + eor r24,r2 + eor r25,r3 + eor r16,r4 + eor r17,r5 + mov r26,r17 + mov r27,r24 + mov r1,r25 + mov r0,r16 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r20,r26 + eor r21,r27 + eor r22,r1 + eor r23,r0 + mov r26,r15 + mov r27,r18 + mov r1,r19 + mov r0,r14 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r2,r26 + eor r3,r27 + eor r4,r1 + eor r5,r0 + bst r20,0 + lsr r23 + ror r22 + ror r21 + ror r20 + bld r23,7 + eor r18,r22 + eor r19,r23 + eor r14,r20 + eor r15,r21 + bst r2,0 + lsr r5 + ror r4 + ror r3 + ror r2 + bld r5,7 + eor r24,r4 + eor r25,r5 + eor r16,r2 + eor r17,r3 + movw r20,r14 + movw r22,r18 + movw r2,r16 + movw r4,r24 + mov r18,r9 + mov r19,r6 + mov r14,r7 + mov r15,r8 + bst r18,0 + lsr r15 + ror r14 + ror r19 + ror r18 + bld r15,7 + eor r18,r6 + eor r19,r7 + eor r14,r8 + eor r15,r9 + mov r24,r13 + mov r25,r10 + mov r16,r11 + mov r17,r12 + bst r24,0 + lsr r17 + ror r16 + ror r25 + ror r24 + bld r17,7 + eor r24,r10 + eor r25,r11 + eor r16,r12 + eor r17,r13 + movw r26,r18 + mov r1,r14 + mov r0,r15 + bst r0,7 + lsl r26 + rol r27 + rol r1 + rol r0 + bld r26,0 + eor r6,r26 + eor r7,r27 + eor r8,r1 + eor r9,r0 + movw r26,r24 + mov r1,r16 + mov r0,r17 + bst r0,7 + lsl r26 + rol r27 + rol r1 + rol r0 + bld r26,0 + eor r10,r26 + eor r11,r27 + eor r12,r1 + eor r13,r0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + eor r6,r15 + eor r7,r18 + eor r8,r19 + eor r9,r14 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + eor r10,r17 + eor r11,r24 + eor r12,r25 + eor r13,r16 + movw r18,r6 + movw r14,r8 + bst r15,7 + lsl r18 + rol r19 + rol r14 + rol r15 + bld r18,0 + eor r18,r6 + eor r19,r7 + eor r14,r8 + eor r15,r9 + movw r24,r10 + movw r16,r12 + bst r17,7 + lsl r24 + rol r25 + rol r16 + rol r17 + bld r24,0 + eor r24,r10 + eor r25,r11 + eor r16,r12 + eor r17,r13 + mov r26,r17 + mov r27,r24 + mov r1,r25 + mov r0,r16 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r6,r26 + eor r7,r27 + eor r8,r1 + eor r9,r0 + mov r26,r15 + mov r27,r18 + mov r1,r19 + mov r0,r14 + bst r26,0 + lsr r0 + ror r1 + ror r27 + ror r26 + bld r0,7 + eor r10,r26 + eor r11,r27 + eor r12,r1 + eor r13,r0 + bst r6,0 + lsr r9 + ror r8 + ror r7 + ror r6 + bld r9,7 + eor r18,r8 + eor r19,r9 + eor r14,r6 + eor r15,r7 + bst r10,0 + lsr r13 + ror r12 + ror r11 + ror r10 + bld r13,7 + eor r24,r12 + eor r25,r13 + eor r16,r10 + eor r17,r11 + movw r6,r14 + movw r8,r18 + movw r10,r16 + movw r12,r24 + movw r24,r20 + movw r16,r22 + and r24,r2 + and r25,r3 + and r16,r4 + and r17,r5 + eor r24,r6 + eor r25,r7 + eor r16,r8 + eor r17,r9 + movw r18,r2 + movw r14,r4 + and r18,r24 + and r19,r25 + and r14,r16 + and r15,r17 + eor r18,r10 + eor r19,r11 + eor r14,r12 + eor r15,r13 + movw r10,r24 + movw r12,r16 + and r24,r18 + and r25,r19 + and r16,r14 + and r17,r15 + eor r24,r20 + eor r25,r21 + eor r16,r22 + eor r17,r23 + movw r6,r18 + movw r8,r14 + and r6,r24 + and r7,r25 + and r8,r16 + and r9,r17 + eor r6,r2 + eor r7,r3 + eor r8,r4 + eor r9,r5 + movw r20,r18 + movw r22,r14 + movw r2,r24 + movw r4,r16 + ret +533: + ld r0,Z + eor r20,r0 + ldd r0,Z+1 + eor r21,r0 + ldd r0,Z+2 + eor r22,r0 + ldd r0,Z+3 + eor r23,r0 + ldd r0,Z+4 + eor r2,r0 + ldd r0,Z+5 + eor r3,r0 + ldd r0,Z+6 + eor r4,r0 + ldd r0,Z+7 + eor r5,r0 + ldd r0,Z+8 + eor r6,r0 + ldd r0,Z+9 + eor r7,r0 + ldd r0,Z+10 + eor r8,r0 + ldd r0,Z+11 + eor r9,r0 + ldd r0,Z+12 + eor r10,r0 + ldd r0,Z+13 + eor r11,r0 + ldd r0,Z+14 + eor r12,r0 + ldd r0,Z+15 + eor r13,r0 + ldd r18,Y+1 + ldd r19,Y+2 + ldd r14,Y+3 + ldd r15,Y+4 + ldd r24,Y+9 + ldd r25,Y+10 + ldd r16,Y+11 + ldd r17,Y+12 + eor r20,r18 + eor r21,r19 + eor r22,r14 + eor r23,r15 + eor r6,r24 + eor r7,r25 + eor r8,r16 + eor r9,r17 + eor r18,r24 + eor r19,r25 + eor r14,r16 + eor r15,r17 + std Y+1,r24 + std Y+2,r25 + std Y+3,r16 + std Y+4,r17 + std Y+9,r18 + std Y+10,r19 + std Y+11,r14 + std Y+12,r15 + ldd r18,Y+5 + ldd r19,Y+6 + ldd r14,Y+7 + ldd r15,Y+8 + ldd r24,Y+13 + ldd r25,Y+14 + ldd r16,Y+15 + ldd r17,Y+16 + eor r2,r18 + eor r3,r19 + eor r4,r14 + eor r5,r15 + eor r10,r24 + eor r11,r25 + eor r12,r16 + eor r13,r17 + eor r18,r24 + eor r19,r25 + eor r14,r16 + eor r15,r17 + std Y+5,r24 + std Y+6,r25 + std Y+7,r16 + std Y+8,r17 + std Y+13,r18 + std Y+14,r19 + std Y+15,r14 + std Y+16,r15 + ret +623: + ld r0,Z + eor r20,r0 + ldd r0,Z+1 + eor r21,r0 + ldd r0,Z+2 + eor r22,r0 + ldd r0,Z+3 + eor r23,r0 + ldd r0,Z+4 + eor r2,r0 + ldd r0,Z+5 + eor r3,r0 + ldd r0,Z+6 + eor r4,r0 + ldd r0,Z+7 + eor r5,r0 + ldd r0,Z+8 + eor r6,r0 + ldd r0,Z+9 + eor r7,r0 + ldd r0,Z+10 + eor r8,r0 + ldd r0,Z+11 + eor r9,r0 + ldd r0,Z+12 + eor r10,r0 + ldd r0,Z+13 + eor r11,r0 + ldd r0,Z+14 + eor r12,r0 + ldd r0,Z+15 + eor r13,r0 + ldd r0,Y+1 + eor r20,r0 + ldd r0,Y+2 + eor r21,r0 + ldd r0,Y+3 + eor r22,r0 + ldd r0,Y+4 + eor r23,r0 + ldd r0,Y+5 + eor r2,r0 + ldd r0,Y+6 + eor r3,r0 + ldd r0,Y+7 + eor r4,r0 + ldd r0,Y+8 + eor r5,r0 + ldd r0,Y+9 + eor r6,r0 + ldd r0,Y+10 + eor r7,r0 + ldd r0,Y+11 + eor r8,r0 + ldd r0,Y+12 + eor r9,r0 + ldd r0,Y+13 + eor r10,r0 + ldd r0,Y+14 + eor r11,r0 + ldd r0,Y+15 + eor r12,r0 + ldd r0,Y+16 + eor r13,r0 + ldd r26,Y+17 + ldd r27,Y+18 + st X+,r20 + st X+,r21 + st X+,r22 + st X+,r23 + st X+,r2 + st X+,r3 + st X+,r4 + st X+,r5 + st X+,r6 + st X+,r7 + st X+,r8 + st X+,r9 + st X+,r10 + st X+,r11 + st X+,r12 + st X+,r13 + adiw r28,18 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + eor r1,r1 + ret + .size clyde128_decrypt, .-clyde128_decrypt + + .text +.global shadow512 + .type shadow512, @function +shadow512: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 1083f + rcall 1127f + ldi r25,1 + eor r18,r25 + rcall 1083f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 1083f + rcall 1127f + ldi r25,2 + eor r18,r25 + rcall 1083f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 1083f + rcall 1127f + ldi r25,4 + eor r18,r25 + rcall 1083f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + ldd r18,Z+48 + ldd r19,Z+49 + ldd r20,Z+50 + ldd r21,Z+51 + ldd r22,Z+52 + ldd r23,Z+53 + ldd r26,Z+54 + ldd r27,Z+55 + ldd r28,Z+56 + ldd r29,Z+57 + ldd r2,Z+58 + ldd r3,Z+59 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + rcall 1083f + rcall 1127f + ldi r25,8 + eor r18,r25 + rcall 1083f + std Z+48,r18 + std Z+49,r19 + std Z+50,r20 + std Z+51,r21 + std Z+52,r22 + std Z+53,r23 + std Z+54,r26 + std Z+55,r27 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + std Z+60,r4 + std Z+61,r5 + std Z+62,r6 + std Z+63,r7 + rcall 1553f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+52 + ldi r16,8 + eor r18,r16 + std Z+52,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 1083f + rcall 1127f + ldi r25,1 + eor r28,r25 + rcall 1083f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 1083f + rcall 1127f + ldi r25,2 + eor r28,r25 + rcall 1083f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 1083f + rcall 1127f + ldi r25,4 + eor r28,r25 + rcall 1083f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + ldd r18,Z+48 + ldd r19,Z+49 + ldd r20,Z+50 + ldd r21,Z+51 + ldd r22,Z+52 + ldd r23,Z+53 + ldd r26,Z+54 + ldd r27,Z+55 + ldd r28,Z+56 + ldd r29,Z+57 + ldd r2,Z+58 + ldd r3,Z+59 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + rcall 1083f + rcall 1127f + ldi r25,8 + eor r28,r25 + rcall 1083f + std Z+48,r18 + std Z+49,r19 + std Z+50,r20 + std Z+51,r21 + std Z+52,r22 + std Z+53,r23 + std Z+54,r26 + std Z+55,r27 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + std Z+60,r4 + std Z+61,r5 + std Z+62,r6 + std Z+63,r7 + rcall 1553f + ldd r18,Z+12 + ldi r25,1 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+28 + ldi r24,2 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+44 + ldi r17,4 + eor r18,r17 + std Z+44,r18 + ldd r18,Z+60 + ldi r16,8 + eor r18,r16 + std Z+60,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 1083f + rcall 1127f + ldi r25,1 + eor r18,r25 + eor r22,r25 + rcall 1083f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 1083f + rcall 1127f + ldi r25,2 + eor r18,r25 + eor r22,r25 + rcall 1083f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 1083f + rcall 1127f + ldi r25,4 + eor r18,r25 + eor r22,r25 + rcall 1083f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + ldd r18,Z+48 + ldd r19,Z+49 + ldd r20,Z+50 + ldd r21,Z+51 + ldd r22,Z+52 + ldd r23,Z+53 + ldd r26,Z+54 + ldd r27,Z+55 + ldd r28,Z+56 + ldd r29,Z+57 + ldd r2,Z+58 + ldd r3,Z+59 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + rcall 1083f + rcall 1127f + ldi r25,8 + eor r18,r25 + eor r22,r25 + rcall 1083f + std Z+48,r18 + std Z+49,r19 + std Z+50,r20 + std Z+51,r21 + std Z+52,r22 + std Z+53,r23 + std Z+54,r26 + std Z+55,r27 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + std Z+60,r4 + std Z+61,r5 + std Z+62,r6 + std Z+63,r7 + rcall 1553f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+8 + eor r18,r25 + std Z+8,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+24 + eor r18,r24 + std Z+24,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+40 + eor r18,r17 + std Z+40,r18 + ldd r18,Z+52 + ldi r16,8 + eor r18,r16 + std Z+52,r18 + ldd r18,Z+56 + eor r18,r16 + std Z+56,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 1083f + rcall 1127f + ldi r25,1 + eor r28,r25 + eor r4,r25 + rcall 1083f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 1083f + rcall 1127f + ldi r25,2 + eor r28,r25 + eor r4,r25 + rcall 1083f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 1083f + rcall 1127f + ldi r25,4 + eor r28,r25 + eor r4,r25 + rcall 1083f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + ldd r18,Z+48 + ldd r19,Z+49 + ldd r20,Z+50 + ldd r21,Z+51 + ldd r22,Z+52 + ldd r23,Z+53 + ldd r26,Z+54 + ldd r27,Z+55 + ldd r28,Z+56 + ldd r29,Z+57 + ldd r2,Z+58 + ldd r3,Z+59 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + rcall 1083f + rcall 1127f + ldi r25,8 + eor r28,r25 + eor r4,r25 + rcall 1083f + std Z+48,r18 + std Z+49,r19 + std Z+50,r20 + std Z+51,r21 + std Z+52,r22 + std Z+53,r23 + std Z+54,r26 + std Z+55,r27 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + std Z+60,r4 + std Z+61,r5 + std Z+62,r6 + std Z+63,r7 + rcall 1553f + ld r18,Z + ldi r25,1 + eor r18,r25 + st Z,r18 + ldd r18,Z+4 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+12 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+16 + ldi r24,2 + eor r18,r24 + std Z+16,r18 + ldd r18,Z+20 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+28 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+32 + ldi r17,4 + eor r18,r17 + std Z+32,r18 + ldd r18,Z+36 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+44 + eor r18,r17 + std Z+44,r18 + ldd r18,Z+48 + ldi r16,8 + eor r18,r16 + std Z+48,r18 + ldd r18,Z+52 + eor r18,r16 + std Z+52,r18 + ldd r18,Z+60 + eor r18,r16 + std Z+60,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 1083f + rcall 1127f + ldi r25,1 + eor r18,r25 + eor r28,r25 + rcall 1083f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 1083f + rcall 1127f + ldi r25,2 + eor r18,r25 + eor r28,r25 + rcall 1083f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 1083f + rcall 1127f + ldi r25,4 + eor r18,r25 + eor r28,r25 + rcall 1083f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + ldd r18,Z+48 + ldd r19,Z+49 + ldd r20,Z+50 + ldd r21,Z+51 + ldd r22,Z+52 + ldd r23,Z+53 + ldd r26,Z+54 + ldd r27,Z+55 + ldd r28,Z+56 + ldd r29,Z+57 + ldd r2,Z+58 + ldd r3,Z+59 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + rcall 1083f + rcall 1127f + ldi r25,8 + eor r18,r25 + eor r28,r25 + rcall 1083f + std Z+48,r18 + std Z+49,r19 + std Z+50,r20 + std Z+51,r21 + std Z+52,r22 + std Z+53,r23 + std Z+54,r26 + std Z+55,r27 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + std Z+60,r4 + std Z+61,r5 + std Z+62,r6 + std Z+63,r7 + rcall 1553f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+12 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+28 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+44 + eor r18,r17 + std Z+44,r18 + ldd r18,Z+52 + ldi r16,8 + eor r18,r16 + std Z+52,r18 + ldd r18,Z+60 + eor r18,r16 + std Z+60,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 1083f + rcall 1127f + ldi r25,1 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 1083f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 1083f + rcall 1127f + ldi r25,2 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 1083f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 1083f + rcall 1127f + ldi r25,4 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 1083f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + ldd r18,Z+48 + ldd r19,Z+49 + ldd r20,Z+50 + ldd r21,Z+51 + ldd r22,Z+52 + ldd r23,Z+53 + ldd r26,Z+54 + ldd r27,Z+55 + ldd r28,Z+56 + ldd r29,Z+57 + ldd r2,Z+58 + ldd r3,Z+59 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + rcall 1083f + rcall 1127f + ldi r25,8 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 1083f + std Z+48,r18 + std Z+49,r19 + std Z+50,r20 + std Z+51,r21 + std Z+52,r22 + std Z+53,r23 + std Z+54,r26 + std Z+55,r27 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + std Z+60,r4 + std Z+61,r5 + std Z+62,r6 + std Z+63,r7 + rcall 1553f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+8 + eor r18,r25 + std Z+8,r18 + ldd r18,Z+12 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+24 + eor r18,r24 + std Z+24,r18 + ldd r18,Z+28 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+40 + eor r18,r17 + std Z+40,r18 + ldd r18,Z+44 + eor r18,r17 + std Z+44,r18 + ldd r18,Z+52 + ldi r16,8 + eor r18,r16 + std Z+52,r18 + ldd r18,Z+56 + eor r18,r16 + std Z+56,r18 + ldd r18,Z+60 + eor r18,r16 + std Z+60,r18 + rjmp 1795f +1083: + movw r8,r18 + movw r10,r20 + and r8,r22 + and r9,r23 + and r10,r26 + and r11,r27 + eor r8,r28 + eor r9,r29 + eor r10,r2 + eor r11,r3 + movw r12,r4 + movw r14,r6 + and r12,r18 + and r13,r19 + and r14,r20 + and r15,r21 + eor r12,r22 + eor r13,r23 + eor r14,r26 + eor r15,r27 + movw r28,r8 + movw r2,r10 + and r28,r12 + and r29,r13 + and r2,r14 + and r3,r15 + eor r28,r4 + eor r29,r5 + eor r2,r6 + eor r3,r7 + and r4,r8 + and r5,r9 + and r6,r10 + and r7,r11 + eor r4,r18 + eor r5,r19 + eor r6,r20 + eor r7,r21 + movw r18,r12 + movw r20,r14 + movw r22,r8 + movw r26,r10 + ret +1127: + mov r8,r19 + mov r9,r20 + mov r10,r21 + mov r11,r18 + mov r0,r1 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + or r11,r0 + eor r8,r18 + eor r9,r19 + eor r10,r20 + eor r11,r21 + mov r12,r23 + mov r13,r26 + mov r14,r27 + mov r15,r22 + mov r0,r1 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + or r15,r0 + eor r12,r22 + eor r13,r23 + eor r14,r26 + eor r15,r27 + movw r24,r8 + movw r16,r10 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r8,r24 + eor r9,r25 + eor r10,r16 + eor r11,r17 + movw r24,r12 + movw r16,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r12,r24 + eor r13,r25 + eor r14,r16 + eor r15,r17 + mov r0,r20 + mov r20,r18 + mov r18,r0 + mov r0,r21 + mov r21,r19 + mov r19,r0 + bst r18,0 + lsr r21 + ror r20 + ror r19 + ror r18 + bld r21,7 + eor r18,r8 + eor r19,r9 + eor r20,r10 + eor r21,r11 + mov r0,r26 + mov r26,r22 + mov r22,r0 + mov r0,r27 + mov r27,r23 + mov r23,r0 + bst r22,0 + lsr r27 + ror r26 + ror r23 + ror r22 + bld r27,7 + eor r22,r12 + eor r23,r13 + eor r26,r14 + eor r27,r15 + movw r8,r18 + movw r10,r20 + lsl r8 + rol r9 + rol r10 + rol r11 + adc r8,r1 + eor r8,r18 + eor r9,r19 + eor r10,r20 + eor r11,r21 + movw r12,r22 + movw r14,r26 + lsl r12 + rol r13 + rol r14 + rol r15 + adc r12,r1 + eor r12,r22 + eor r13,r23 + eor r14,r26 + eor r15,r27 + mov r24,r15 + mov r25,r12 + mov r16,r13 + mov r17,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r18,r24 + eor r19,r25 + eor r20,r16 + eor r21,r17 + mov r24,r11 + mov r25,r8 + mov r16,r9 + mov r17,r10 + bst r24,0 + lsr r17 + ror r16 + ror r25 + ror r24 + bld r17,7 + eor r22,r24 + eor r23,r25 + eor r26,r16 + eor r27,r17 + lsl r10 + rol r11 + rol r8 + rol r9 + adc r10,r1 + eor r18,r10 + eor r19,r11 + eor r20,r8 + eor r21,r9 + lsl r14 + rol r15 + rol r12 + rol r13 + adc r14,r1 + eor r22,r14 + eor r23,r15 + eor r26,r12 + eor r27,r13 + mov r8,r29 + mov r9,r2 + mov r10,r3 + mov r11,r28 + mov r0,r1 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + or r11,r0 + eor r8,r28 + eor r9,r29 + eor r10,r2 + eor r11,r3 + mov r12,r5 + mov r13,r6 + mov r14,r7 + mov r15,r4 + mov r0,r1 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + or r15,r0 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + movw r24,r8 + movw r16,r10 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r8,r24 + eor r9,r25 + eor r10,r16 + eor r11,r17 + movw r24,r12 + movw r16,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r12,r24 + eor r13,r25 + eor r14,r16 + eor r15,r17 + mov r0,r2 + mov r2,r28 + mov r28,r0 + mov r0,r3 + mov r3,r29 + mov r29,r0 + bst r28,0 + lsr r3 + ror r2 + ror r29 + ror r28 + bld r3,7 + eor r28,r8 + eor r29,r9 + eor r2,r10 + eor r3,r11 + mov r0,r6 + mov r6,r4 + mov r4,r0 + mov r0,r7 + mov r7,r5 + mov r5,r0 + bst r4,0 + lsr r7 + ror r6 + ror r5 + ror r4 + bld r7,7 + eor r4,r12 + eor r5,r13 + eor r6,r14 + eor r7,r15 + movw r8,r28 + movw r10,r2 + lsl r8 + rol r9 + rol r10 + rol r11 + adc r8,r1 + eor r8,r28 + eor r9,r29 + eor r10,r2 + eor r11,r3 + movw r12,r4 + movw r14,r6 + lsl r12 + rol r13 + rol r14 + rol r15 + adc r12,r1 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + mov r24,r15 + mov r25,r12 + mov r16,r13 + mov r17,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r28,r24 + eor r29,r25 + eor r2,r16 + eor r3,r17 + mov r24,r11 + mov r25,r8 + mov r16,r9 + mov r17,r10 + bst r24,0 + lsr r17 + ror r16 + ror r25 + ror r24 + bld r17,7 + eor r4,r24 + eor r5,r25 + eor r6,r16 + eor r7,r17 + lsl r10 + rol r11 + rol r8 + rol r9 + adc r10,r1 + eor r28,r10 + eor r29,r11 + eor r2,r8 + eor r3,r9 + lsl r14 + rol r15 + rol r12 + rol r13 + adc r14,r1 + eor r4,r14 + eor r5,r15 + eor r6,r12 + eor r7,r13 + ret +1553: + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+16 + ldd r23,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + ldd r28,Z+32 + ldd r29,Z+33 + ldd r2,Z+34 + ldd r3,Z+35 + ldd r4,Z+48 + ldd r5,Z+49 + ldd r6,Z+50 + ldd r7,Z+51 + movw r8,r18 + movw r10,r20 + eor r8,r22 + eor r9,r23 + eor r10,r26 + eor r11,r27 + movw r12,r28 + movw r14,r2 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + eor r18,r12 + eor r19,r13 + eor r20,r14 + eor r21,r15 + eor r22,r12 + eor r23,r13 + eor r26,r14 + eor r27,r15 + eor r28,r8 + eor r29,r9 + eor r2,r10 + eor r3,r11 + eor r4,r8 + eor r5,r9 + eor r6,r10 + eor r7,r11 + st Z,r22 + std Z+1,r23 + std Z+2,r26 + std Z+3,r27 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+32,r4 + std Z+33,r5 + std Z+34,r6 + std Z+35,r7 + std Z+48,r28 + std Z+49,r29 + std Z+50,r2 + std Z+51,r3 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r20,Z+6 + ldd r21,Z+7 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+36 + ldd r29,Z+37 + ldd r2,Z+38 + ldd r3,Z+39 + ldd r4,Z+52 + ldd r5,Z+53 + ldd r6,Z+54 + ldd r7,Z+55 + movw r8,r18 + movw r10,r20 + eor r8,r22 + eor r9,r23 + eor r10,r26 + eor r11,r27 + movw r12,r28 + movw r14,r2 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + eor r18,r12 + eor r19,r13 + eor r20,r14 + eor r21,r15 + eor r22,r12 + eor r23,r13 + eor r26,r14 + eor r27,r15 + eor r28,r8 + eor r29,r9 + eor r2,r10 + eor r3,r11 + eor r4,r8 + eor r5,r9 + eor r6,r10 + eor r7,r11 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + std Z+36,r4 + std Z+37,r5 + std Z+38,r6 + std Z+39,r7 + std Z+52,r28 + std Z+53,r29 + std Z+54,r2 + std Z+55,r3 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r20,Z+10 + ldd r21,Z+11 + ldd r22,Z+24 + ldd r23,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+56 + ldd r5,Z+57 + ldd r6,Z+58 + ldd r7,Z+59 + movw r8,r18 + movw r10,r20 + eor r8,r22 + eor r9,r23 + eor r10,r26 + eor r11,r27 + movw r12,r28 + movw r14,r2 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + eor r18,r12 + eor r19,r13 + eor r20,r14 + eor r21,r15 + eor r22,r12 + eor r23,r13 + eor r26,r14 + eor r27,r15 + eor r28,r8 + eor r29,r9 + eor r2,r10 + eor r3,r11 + eor r4,r8 + eor r5,r9 + eor r6,r10 + eor r7,r11 + std Z+8,r22 + std Z+9,r23 + std Z+10,r26 + std Z+11,r27 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + std Z+40,r4 + std Z+41,r5 + std Z+42,r6 + std Z+43,r7 + std Z+56,r28 + std Z+57,r29 + std Z+58,r2 + std Z+59,r3 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r20,Z+14 + ldd r21,Z+15 + ldd r22,Z+28 + ldd r23,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + ldd r28,Z+44 + ldd r29,Z+45 + ldd r2,Z+46 + ldd r3,Z+47 + ldd r4,Z+60 + ldd r5,Z+61 + ldd r6,Z+62 + ldd r7,Z+63 + movw r8,r18 + movw r10,r20 + eor r8,r22 + eor r9,r23 + eor r10,r26 + eor r11,r27 + movw r12,r28 + movw r14,r2 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + eor r18,r12 + eor r19,r13 + eor r20,r14 + eor r21,r15 + eor r22,r12 + eor r23,r13 + eor r26,r14 + eor r27,r15 + eor r28,r8 + eor r29,r9 + eor r2,r10 + eor r3,r11 + eor r4,r8 + eor r5,r9 + eor r6,r10 + eor r7,r11 + std Z+12,r22 + std Z+13,r23 + std Z+14,r26 + std Z+15,r27 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + std Z+60,r28 + std Z+61,r29 + std Z+62,r2 + std Z+63,r3 + ret +1795: + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size shadow512, .-shadow512 + + .text +.global shadow384 + .type shadow384, @function +shadow384: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 +.L__stack_usage = 18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 814f + rcall 858f + ldi r25,1 + eor r18,r25 + rcall 814f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 814f + rcall 858f + ldi r25,2 + eor r18,r25 + rcall 814f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 814f + rcall 858f + ldi r25,4 + eor r18,r25 + rcall 814f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + rcall 1284f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 814f + rcall 858f + ldi r25,1 + eor r28,r25 + rcall 814f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 814f + rcall 858f + ldi r25,2 + eor r28,r25 + rcall 814f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 814f + rcall 858f + ldi r25,4 + eor r28,r25 + rcall 814f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + rcall 1284f + ldd r18,Z+12 + ldi r25,1 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+28 + ldi r24,2 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+44 + ldi r17,4 + eor r18,r17 + std Z+44,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 814f + rcall 858f + ldi r25,1 + eor r18,r25 + eor r22,r25 + rcall 814f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 814f + rcall 858f + ldi r25,2 + eor r18,r25 + eor r22,r25 + rcall 814f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 814f + rcall 858f + ldi r25,4 + eor r18,r25 + eor r22,r25 + rcall 814f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + rcall 1284f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+8 + eor r18,r25 + std Z+8,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+24 + eor r18,r24 + std Z+24,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+40 + eor r18,r17 + std Z+40,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 814f + rcall 858f + ldi r25,1 + eor r28,r25 + eor r4,r25 + rcall 814f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 814f + rcall 858f + ldi r25,2 + eor r28,r25 + eor r4,r25 + rcall 814f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 814f + rcall 858f + ldi r25,4 + eor r28,r25 + eor r4,r25 + rcall 814f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + rcall 1284f + ld r18,Z + ldi r25,1 + eor r18,r25 + st Z,r18 + ldd r18,Z+4 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+12 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+16 + ldi r24,2 + eor r18,r24 + std Z+16,r18 + ldd r18,Z+20 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+28 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+32 + ldi r17,4 + eor r18,r17 + std Z+32,r18 + ldd r18,Z+36 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+44 + eor r18,r17 + std Z+44,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 814f + rcall 858f + ldi r25,1 + eor r18,r25 + eor r28,r25 + rcall 814f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 814f + rcall 858f + ldi r25,2 + eor r18,r25 + eor r28,r25 + rcall 814f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 814f + rcall 858f + ldi r25,4 + eor r18,r25 + eor r28,r25 + rcall 814f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + rcall 1284f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+12 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+28 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+44 + eor r18,r17 + std Z+44,r18 + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+4 + ldd r23,Z+5 + ldd r26,Z+6 + ldd r27,Z+7 + ldd r28,Z+8 + ldd r29,Z+9 + ldd r2,Z+10 + ldd r3,Z+11 + ldd r4,Z+12 + ldd r5,Z+13 + ldd r6,Z+14 + ldd r7,Z+15 + rcall 814f + rcall 858f + ldi r25,1 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 814f + st Z,r18 + std Z+1,r19 + std Z+2,r20 + std Z+3,r21 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + std Z+8,r28 + std Z+9,r29 + std Z+10,r2 + std Z+11,r3 + std Z+12,r4 + std Z+13,r5 + std Z+14,r6 + std Z+15,r7 + ldd r18,Z+16 + ldd r19,Z+17 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+24 + ldd r29,Z+25 + ldd r2,Z+26 + ldd r3,Z+27 + ldd r4,Z+28 + ldd r5,Z+29 + ldd r6,Z+30 + ldd r7,Z+31 + rcall 814f + rcall 858f + ldi r25,2 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 814f + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + std Z+21,r23 + std Z+22,r26 + std Z+23,r27 + std Z+24,r28 + std Z+25,r29 + std Z+26,r2 + std Z+27,r3 + std Z+28,r4 + std Z+29,r5 + std Z+30,r6 + std Z+31,r7 + ldd r18,Z+32 + ldd r19,Z+33 + ldd r20,Z+34 + ldd r21,Z+35 + ldd r22,Z+36 + ldd r23,Z+37 + ldd r26,Z+38 + ldd r27,Z+39 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + ldd r4,Z+44 + ldd r5,Z+45 + ldd r6,Z+46 + ldd r7,Z+47 + rcall 814f + rcall 858f + ldi r25,4 + eor r18,r25 + eor r22,r25 + eor r28,r25 + rcall 814f + std Z+32,r18 + std Z+33,r19 + std Z+34,r20 + std Z+35,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + std Z+40,r28 + std Z+41,r29 + std Z+42,r2 + std Z+43,r3 + std Z+44,r4 + std Z+45,r5 + std Z+46,r6 + std Z+47,r7 + rcall 1284f + ldd r18,Z+4 + ldi r25,1 + eor r18,r25 + std Z+4,r18 + ldd r18,Z+8 + eor r18,r25 + std Z+8,r18 + ldd r18,Z+12 + eor r18,r25 + std Z+12,r18 + ldd r18,Z+20 + ldi r24,2 + eor r18,r24 + std Z+20,r18 + ldd r18,Z+24 + eor r18,r24 + std Z+24,r18 + ldd r18,Z+28 + eor r18,r24 + std Z+28,r18 + ldd r18,Z+36 + ldi r17,4 + eor r18,r17 + std Z+36,r18 + ldd r18,Z+40 + eor r18,r17 + std Z+40,r18 + ldd r18,Z+44 + eor r18,r17 + std Z+44,r18 + rjmp 1430f +814: + movw r8,r18 + movw r10,r20 + and r8,r22 + and r9,r23 + and r10,r26 + and r11,r27 + eor r8,r28 + eor r9,r29 + eor r10,r2 + eor r11,r3 + movw r12,r4 + movw r14,r6 + and r12,r18 + and r13,r19 + and r14,r20 + and r15,r21 + eor r12,r22 + eor r13,r23 + eor r14,r26 + eor r15,r27 + movw r28,r8 + movw r2,r10 + and r28,r12 + and r29,r13 + and r2,r14 + and r3,r15 + eor r28,r4 + eor r29,r5 + eor r2,r6 + eor r3,r7 + and r4,r8 + and r5,r9 + and r6,r10 + and r7,r11 + eor r4,r18 + eor r5,r19 + eor r6,r20 + eor r7,r21 + movw r18,r12 + movw r20,r14 + movw r22,r8 + movw r26,r10 + ret +858: + mov r8,r19 + mov r9,r20 + mov r10,r21 + mov r11,r18 + mov r0,r1 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + or r11,r0 + eor r8,r18 + eor r9,r19 + eor r10,r20 + eor r11,r21 + mov r12,r23 + mov r13,r26 + mov r14,r27 + mov r15,r22 + mov r0,r1 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + or r15,r0 + eor r12,r22 + eor r13,r23 + eor r14,r26 + eor r15,r27 + movw r24,r8 + movw r16,r10 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r8,r24 + eor r9,r25 + eor r10,r16 + eor r11,r17 + movw r24,r12 + movw r16,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r12,r24 + eor r13,r25 + eor r14,r16 + eor r15,r17 + mov r0,r20 + mov r20,r18 + mov r18,r0 + mov r0,r21 + mov r21,r19 + mov r19,r0 + bst r18,0 + lsr r21 + ror r20 + ror r19 + ror r18 + bld r21,7 + eor r18,r8 + eor r19,r9 + eor r20,r10 + eor r21,r11 + mov r0,r26 + mov r26,r22 + mov r22,r0 + mov r0,r27 + mov r27,r23 + mov r23,r0 + bst r22,0 + lsr r27 + ror r26 + ror r23 + ror r22 + bld r27,7 + eor r22,r12 + eor r23,r13 + eor r26,r14 + eor r27,r15 + movw r8,r18 + movw r10,r20 + lsl r8 + rol r9 + rol r10 + rol r11 + adc r8,r1 + eor r8,r18 + eor r9,r19 + eor r10,r20 + eor r11,r21 + movw r12,r22 + movw r14,r26 + lsl r12 + rol r13 + rol r14 + rol r15 + adc r12,r1 + eor r12,r22 + eor r13,r23 + eor r14,r26 + eor r15,r27 + mov r24,r15 + mov r25,r12 + mov r16,r13 + mov r17,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r18,r24 + eor r19,r25 + eor r20,r16 + eor r21,r17 + mov r24,r11 + mov r25,r8 + mov r16,r9 + mov r17,r10 + bst r24,0 + lsr r17 + ror r16 + ror r25 + ror r24 + bld r17,7 + eor r22,r24 + eor r23,r25 + eor r26,r16 + eor r27,r17 + lsl r10 + rol r11 + rol r8 + rol r9 + adc r10,r1 + eor r18,r10 + eor r19,r11 + eor r20,r8 + eor r21,r9 + lsl r14 + rol r15 + rol r12 + rol r13 + adc r14,r1 + eor r22,r14 + eor r23,r15 + eor r26,r12 + eor r27,r13 + mov r8,r29 + mov r9,r2 + mov r10,r3 + mov r11,r28 + mov r0,r1 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r0 + or r11,r0 + eor r8,r28 + eor r9,r29 + eor r10,r2 + eor r11,r3 + mov r12,r5 + mov r13,r6 + mov r14,r7 + mov r15,r4 + mov r0,r1 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + lsr r15 + ror r14 + ror r13 + ror r12 + ror r0 + or r15,r0 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + movw r24,r8 + movw r16,r10 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r8,r24 + eor r9,r25 + eor r10,r16 + eor r11,r17 + movw r24,r12 + movw r16,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r12,r24 + eor r13,r25 + eor r14,r16 + eor r15,r17 + mov r0,r2 + mov r2,r28 + mov r28,r0 + mov r0,r3 + mov r3,r29 + mov r29,r0 + bst r28,0 + lsr r3 + ror r2 + ror r29 + ror r28 + bld r3,7 + eor r28,r8 + eor r29,r9 + eor r2,r10 + eor r3,r11 + mov r0,r6 + mov r6,r4 + mov r4,r0 + mov r0,r7 + mov r7,r5 + mov r5,r0 + bst r4,0 + lsr r7 + ror r6 + ror r5 + ror r4 + bld r7,7 + eor r4,r12 + eor r5,r13 + eor r6,r14 + eor r7,r15 + movw r8,r28 + movw r10,r2 + lsl r8 + rol r9 + rol r10 + rol r11 + adc r8,r1 + eor r8,r28 + eor r9,r29 + eor r10,r2 + eor r11,r3 + movw r12,r4 + movw r14,r6 + lsl r12 + rol r13 + rol r14 + rol r15 + adc r12,r1 + eor r12,r4 + eor r13,r5 + eor r14,r6 + eor r15,r7 + mov r24,r15 + mov r25,r12 + mov r16,r13 + mov r17,r14 + mov r0,r1 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r0 + or r17,r0 + eor r28,r24 + eor r29,r25 + eor r2,r16 + eor r3,r17 + mov r24,r11 + mov r25,r8 + mov r16,r9 + mov r17,r10 + bst r24,0 + lsr r17 + ror r16 + ror r25 + ror r24 + bld r17,7 + eor r4,r24 + eor r5,r25 + eor r6,r16 + eor r7,r17 + lsl r10 + rol r11 + rol r8 + rol r9 + adc r10,r1 + eor r28,r10 + eor r29,r11 + eor r2,r8 + eor r3,r9 + lsl r14 + rol r15 + rol r12 + rol r13 + adc r14,r1 + eor r4,r14 + eor r5,r15 + eor r6,r12 + eor r7,r13 + ret +1284: + ld r18,Z + ldd r19,Z+1 + ldd r20,Z+2 + ldd r21,Z+3 + ldd r22,Z+16 + ldd r23,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + ldd r28,Z+32 + ldd r29,Z+33 + ldd r2,Z+34 + ldd r3,Z+35 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + std Z+32,r22 + std Z+33,r23 + std Z+34,r26 + std Z+35,r27 + eor r22,r28 + eor r23,r29 + eor r26,r2 + eor r27,r3 + st Z,r22 + std Z+1,r23 + std Z+2,r26 + std Z+3,r27 + eor r18,r28 + eor r19,r29 + eor r20,r2 + eor r21,r3 + std Z+16,r18 + std Z+17,r19 + std Z+18,r20 + std Z+19,r21 + ldd r18,Z+4 + ldd r19,Z+5 + ldd r20,Z+6 + ldd r21,Z+7 + ldd r22,Z+20 + ldd r23,Z+21 + ldd r26,Z+22 + ldd r27,Z+23 + ldd r28,Z+36 + ldd r29,Z+37 + ldd r2,Z+38 + ldd r3,Z+39 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + std Z+36,r22 + std Z+37,r23 + std Z+38,r26 + std Z+39,r27 + eor r22,r28 + eor r23,r29 + eor r26,r2 + eor r27,r3 + std Z+4,r22 + std Z+5,r23 + std Z+6,r26 + std Z+7,r27 + eor r18,r28 + eor r19,r29 + eor r20,r2 + eor r21,r3 + std Z+20,r18 + std Z+21,r19 + std Z+22,r20 + std Z+23,r21 + ldd r18,Z+8 + ldd r19,Z+9 + ldd r20,Z+10 + ldd r21,Z+11 + ldd r22,Z+24 + ldd r23,Z+25 + ldd r26,Z+26 + ldd r27,Z+27 + ldd r28,Z+40 + ldd r29,Z+41 + ldd r2,Z+42 + ldd r3,Z+43 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + std Z+40,r22 + std Z+41,r23 + std Z+42,r26 + std Z+43,r27 + eor r22,r28 + eor r23,r29 + eor r26,r2 + eor r27,r3 + std Z+8,r22 + std Z+9,r23 + std Z+10,r26 + std Z+11,r27 + eor r18,r28 + eor r19,r29 + eor r20,r2 + eor r21,r3 + std Z+24,r18 + std Z+25,r19 + std Z+26,r20 + std Z+27,r21 + ldd r18,Z+12 + ldd r19,Z+13 + ldd r20,Z+14 + ldd r21,Z+15 + ldd r22,Z+28 + ldd r23,Z+29 + ldd r26,Z+30 + ldd r27,Z+31 + ldd r28,Z+44 + ldd r29,Z+45 + ldd r2,Z+46 + ldd r3,Z+47 + eor r22,r18 + eor r23,r19 + eor r26,r20 + eor r27,r21 + std Z+44,r22 + std Z+45,r23 + std Z+46,r26 + std Z+47,r27 + eor r22,r28 + eor r23,r29 + eor r26,r2 + eor r27,r3 + std Z+12,r22 + std Z+13,r23 + std Z+14,r26 + std Z+15,r27 + eor r18,r28 + eor r19,r29 + eor r20,r2 + eor r21,r3 + std Z+28,r18 + std Z+29,r19 + std Z+30,r20 + std Z+31,r21 + ret +1430: + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size shadow384, .-shadow384 + +#endif diff --git a/spook/Implementations/crypto_aead/spook128su512v1/rhys/internal-spook.c b/spook/Implementations/crypto_aead/spook128su512v1/rhys/internal-spook.c index 0e19216..068938b 100644 --- a/spook/Implementations/crypto_aead/spook128su512v1/rhys/internal-spook.c +++ b/spook/Implementations/crypto_aead/spook128su512v1/rhys/internal-spook.c @@ -22,6 +22,8 @@ #include "internal-spook.h" +#if !defined(__AVR__) + /** * \brief Number of steps in the Clyde-128 block cipher. * @@ -43,9 +45,9 @@ static uint8_t const rc[CLYDE128_STEPS][8] = { }; void clyde128_encrypt(const unsigned char key[CLYDE128_KEY_SIZE], - const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4], uint32_t output[CLYDE128_BLOCK_SIZE / 4], - const uint32_t input[CLYDE128_BLOCK_SIZE / 4]) + const uint32_t input[CLYDE128_BLOCK_SIZE / 4], + const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4]) { uint32_t k0, k1, k2, k3; uint32_t t0, t1, t2, t3; @@ -154,9 +156,9 @@ void clyde128_encrypt(const unsigned char key[CLYDE128_KEY_SIZE], } void clyde128_decrypt(const unsigned char key[CLYDE128_KEY_SIZE], - const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4], uint32_t output[CLYDE128_BLOCK_SIZE / 4], - const unsigned char input[CLYDE128_BLOCK_SIZE]) + const unsigned char input[CLYDE128_BLOCK_SIZE], + const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4]) { uint32_t k0, k1, k2, k3; uint32_t t0, t1, t2, t3; @@ -555,3 +557,5 @@ void shadow384(shadow384_state_t *state) le_store_word32(state->B + 44, s23); #endif } + +#endif /* !__AVR__ */ diff --git a/spook/Implementations/crypto_aead/spook128su512v1/rhys/internal-spook.h b/spook/Implementations/crypto_aead/spook128su512v1/rhys/internal-spook.h index b08ce80..77c8b86 100644 --- a/spook/Implementations/crypto_aead/spook128su512v1/rhys/internal-spook.h +++ b/spook/Implementations/crypto_aead/spook128su512v1/rhys/internal-spook.h @@ -93,31 +93,31 @@ typedef union * \brief Encrypts a block with the Clyde-128 block cipher. * * \param key Points to the key to encrypt with. - * \param tweak Points to the tweak to encrypt with. * \param output Output buffer for the ciphertext. * \param input Input buffer for the plaintext. + * \param tweak Points to the tweak to encrypt with. * * \sa clyde128_decrypt() */ void clyde128_encrypt(const unsigned char key[CLYDE128_KEY_SIZE], - const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4], uint32_t output[CLYDE128_BLOCK_SIZE / 4], - const uint32_t input[CLYDE128_BLOCK_SIZE / 4]); + const uint32_t input[CLYDE128_BLOCK_SIZE / 4], + const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4]); /** * \brief Decrypts a block with the Clyde-128 block cipher. * * \param key Points to the key to decrypt with. - * \param tweak Points to the tweak to decrypt with. * \param output Output buffer for the plaintext. * \param input Input buffer for the ciphertext. + * \param tweak Points to the tweak to decrypt with. * * \sa clyde128_encrypt() */ void clyde128_decrypt(const unsigned char key[CLYDE128_KEY_SIZE], - const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4], uint32_t output[CLYDE128_BLOCK_SIZE / 4], - const unsigned char input[CLYDE128_BLOCK_SIZE]); + const unsigned char input[CLYDE128_BLOCK_SIZE], + const uint32_t tweak[CLYDE128_TWEAK_SIZE / 4]); /** * \brief Performs the Shadow-512 permutation on a state. diff --git a/spook/Implementations/crypto_aead/spook128su512v1/rhys/spook.c b/spook/Implementations/crypto_aead/spook128su512v1/rhys/spook.c index d075b33..2dbab94 100644 --- a/spook/Implementations/crypto_aead/spook128su512v1/rhys/spook.c +++ b/spook/Implementations/crypto_aead/spook128su512v1/rhys/spook.c @@ -86,7 +86,7 @@ static void spook_128_512_init state->B[CLYDE128_BLOCK_SIZE - 1] |= 0x40; } memcpy(state->B + CLYDE128_BLOCK_SIZE, npub, CLYDE128_BLOCK_SIZE); - clyde128_encrypt(k, state->W, state->W + 12, state->W + 4); + clyde128_encrypt(k, state->W + 12, state->W + 4, state->W); shadow512(state); } @@ -111,7 +111,7 @@ static void spook_128_384_init state->B[CLYDE128_BLOCK_SIZE - 1] |= 0x40; } memcpy(state->B + CLYDE128_BLOCK_SIZE, npub, CLYDE128_BLOCK_SIZE); - clyde128_encrypt(k, state->W, state->W + 8, state->W + 4); + clyde128_encrypt(k, state->W + 8, state->W + 4, state->W); shadow384(state); } @@ -310,7 +310,7 @@ int spook_128_512_su_aead_encrypt /* Compute the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_encrypt(k, state.W + 4, state.W, state.W); + clyde128_encrypt(k, state.W, state.W, state.W + 4); memcpy(c + mlen, state.B, SPOOK_TAG_SIZE); return 0; } @@ -345,7 +345,7 @@ int spook_128_512_su_aead_decrypt /* Check the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_decrypt(k, state.W + 4, state.W + 4, c + clen); + clyde128_decrypt(k, state.W + 4, c + clen, state.W + 4); return aead_check_tag (m, clen, state.B, state.B + CLYDE128_BLOCK_SIZE, SPOOK_TAG_SIZE); } @@ -377,7 +377,7 @@ int spook_128_384_su_aead_encrypt /* Compute the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_encrypt(k, state.W + 4, state.W, state.W); + clyde128_encrypt(k, state.W, state.W, state.W + 4); memcpy(c + mlen, state.B, SPOOK_TAG_SIZE); return 0; } @@ -412,7 +412,7 @@ int spook_128_384_su_aead_decrypt /* Check the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_decrypt(k, state.W + 4, state.W + 4, c + clen); + clyde128_decrypt(k, state.W + 4, c + clen, state.W + 4); return aead_check_tag (m, clen, state.B, state.B + CLYDE128_BLOCK_SIZE, SPOOK_TAG_SIZE); } @@ -444,7 +444,7 @@ int spook_128_512_mu_aead_encrypt /* Compute the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_encrypt(k, state.W + 4, state.W, state.W); + clyde128_encrypt(k, state.W, state.W, state.W + 4); memcpy(c + mlen, state.B, SPOOK_TAG_SIZE); return 0; } @@ -479,7 +479,7 @@ int spook_128_512_mu_aead_decrypt /* Check the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_decrypt(k, state.W + 4, state.W + 4, c + clen); + clyde128_decrypt(k, state.W + 4, c + clen, state.W + 4); return aead_check_tag (m, clen, state.B, state.B + CLYDE128_BLOCK_SIZE, SPOOK_TAG_SIZE); } @@ -511,7 +511,7 @@ int spook_128_384_mu_aead_encrypt /* Compute the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_encrypt(k, state.W + 4, state.W, state.W); + clyde128_encrypt(k, state.W, state.W, state.W + 4); memcpy(c + mlen, state.B, SPOOK_TAG_SIZE); return 0; } @@ -546,7 +546,7 @@ int spook_128_384_mu_aead_decrypt /* Check the authentication tag */ state.B[CLYDE128_BLOCK_SIZE * 2 - 1] |= 0x80; - clyde128_decrypt(k, state.W + 4, state.W + 4, c + clen); + clyde128_decrypt(k, state.W + 4, c + clen, state.W + 4); return aead_check_tag (m, clen, state.B, state.B + CLYDE128_BLOCK_SIZE, SPOOK_TAG_SIZE); } diff --git a/subterranean/Implementations/crypto_aead/subterraneanv1/rhys/internal-subterranean-avr.S b/subterranean/Implementations/crypto_aead/subterraneanv1/rhys/internal-subterranean-avr.S new file mode 100644 index 0000000..6380870 --- /dev/null +++ b/subterranean/Implementations/crypto_aead/subterraneanv1/rhys/internal-subterranean-avr.S @@ -0,0 +1,1632 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .text +.global subterranean_round + .type subterranean_round, @function +subterranean_round: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r20,Z + ldd r21,Z+1 + ldd r22,Z+2 + ldd r23,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r2,Z+6 + ldd r3,Z+7 + mov r18,r20 + lsl r18 + ldd r0,Z+32 + eor r18,r0 + movw r4,r20 + movw r6,r22 + movw r8,r26 + movw r10,r2 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r15 + ror r14 + ror r13 + ror r12 + com r4 + com r5 + com r6 + com r7 + com r8 + com r9 + com r10 + and r4,r12 + and r5,r13 + and r6,r14 + and r7,r15 + and r8,r24 + and r9,r25 + and r10,r16 + eor r20,r4 + eor r21,r5 + eor r22,r6 + eor r23,r7 + eor r26,r8 + eor r27,r9 + eor r2,r10 + ldi r25,1 + eor r20,r25 + std Y+1,r20 + std Y+2,r21 + std Y+3,r22 + std Y+4,r23 + std Y+5,r26 + std Y+6,r27 + std Y+7,r2 + mov r20,r3 + ldd r21,Z+8 + ldd r22,Z+9 + ldd r23,Z+10 + ldd r26,Z+11 + ldd r27,Z+12 + ldd r2,Z+13 + ldd r3,Z+14 + movw r4,r20 + movw r6,r22 + movw r8,r26 + movw r10,r2 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r15 + ror r14 + ror r13 + ror r12 + com r4 + com r5 + com r6 + com r7 + com r8 + com r9 + com r10 + and r4,r12 + and r5,r13 + and r6,r14 + and r7,r15 + and r8,r24 + and r9,r25 + and r10,r16 + eor r20,r4 + eor r21,r5 + eor r22,r6 + eor r23,r7 + eor r26,r8 + eor r27,r9 + eor r2,r10 + std Y+8,r20 + std Y+9,r21 + std Y+10,r22 + std Y+11,r23 + std Y+12,r26 + std Y+13,r27 + std Y+14,r2 + mov r20,r3 + ldd r21,Z+15 + ldd r22,Z+16 + ldd r23,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + ldd r2,Z+20 + ldd r3,Z+21 + movw r4,r20 + movw r6,r22 + movw r8,r26 + movw r10,r2 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r15 + ror r14 + ror r13 + ror r12 + com r4 + com r5 + com r6 + com r7 + com r8 + com r9 + com r10 + and r4,r12 + and r5,r13 + and r6,r14 + and r7,r15 + and r8,r24 + and r9,r25 + and r10,r16 + eor r20,r4 + eor r21,r5 + eor r22,r6 + eor r23,r7 + eor r26,r8 + eor r27,r9 + eor r2,r10 + std Y+15,r20 + std Y+16,r21 + std Y+17,r22 + std Y+18,r23 + std Y+19,r26 + std Y+20,r27 + std Y+21,r2 + mov r20,r3 + ldd r21,Z+22 + ldd r22,Z+23 + ldd r23,Z+24 + ldd r26,Z+25 + ldd r27,Z+26 + ldd r2,Z+27 + ldd r3,Z+28 + movw r4,r20 + movw r6,r22 + movw r8,r26 + movw r10,r2 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r15 + ror r14 + ror r13 + ror r12 + com r4 + com r5 + com r6 + com r7 + com r8 + com r9 + com r10 + and r4,r12 + and r5,r13 + and r6,r14 + and r7,r15 + and r8,r24 + and r9,r25 + and r10,r16 + eor r20,r4 + eor r21,r5 + eor r22,r6 + eor r23,r7 + eor r26,r8 + eor r27,r9 + eor r2,r10 + std Y+22,r20 + std Y+23,r21 + std Y+24,r22 + std Y+25,r23 + std Y+26,r26 + std Y+27,r27 + std Y+28,r2 + mov r20,r3 + ldd r21,Z+29 + ldd r22,Z+30 + ldd r23,Z+31 + mov r26,r18 + movw r4,r20 + movw r6,r22 + mov r8,r26 + lsr r8 + ror r7 + ror r6 + ror r5 + ror r4 + movw r12,r4 + movw r14,r6 + mov r24,r8 + lsr r24 + ror r15 + ror r14 + ror r13 + ror r12 + com r4 + com r5 + com r6 + com r7 + and r4,r12 + and r5,r13 + and r6,r14 + and r7,r15 + eor r20,r4 + eor r21,r5 + eor r22,r6 + eor r23,r7 + std Y+29,r20 + std Y+30,r21 + std Y+31,r22 + std Y+32,r23 + mov r20,r18 + lsr r20 + mov r21,r20 + lsr r21 + com r20 + and r20,r21 + eor r18,r20 + andi r18,1 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r2,Y+7 + ldd r3,Y+8 + movw r4,r20 + lsl r4 + rol r5 + eor r18,r4 + mov r19,r5 + movw r4,r20 + movw r6,r22 + movw r8,r26 + movw r10,r2 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + eor r20,r21 + eor r21,r22 + eor r22,r23 + eor r23,r26 + eor r26,r27 + eor r27,r2 + eor r2,r3 + eor r20,r4 + eor r21,r5 + eor r22,r6 + eor r23,r7 + eor r26,r8 + eor r27,r9 + eor r2,r10 + std Y+1,r20 + std Y+2,r21 + std Y+3,r22 + std Y+4,r23 + std Y+5,r26 + std Y+6,r27 + std Y+7,r2 + mov r20,r3 + ldd r21,Y+9 + ldd r22,Y+10 + ldd r23,Y+11 + ldd r26,Y+12 + ldd r27,Y+13 + ldd r2,Y+14 + ldd r3,Y+15 + movw r4,r20 + movw r6,r22 + movw r8,r26 + movw r10,r2 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + eor r20,r21 + eor r21,r22 + eor r22,r23 + eor r23,r26 + eor r26,r27 + eor r27,r2 + eor r2,r3 + eor r20,r4 + eor r21,r5 + eor r22,r6 + eor r23,r7 + eor r26,r8 + eor r27,r9 + eor r2,r10 + std Y+8,r20 + std Y+9,r21 + std Y+10,r22 + std Y+11,r23 + std Y+12,r26 + std Y+13,r27 + std Y+14,r2 + mov r20,r3 + ldd r21,Y+16 + ldd r22,Y+17 + ldd r23,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r2,Y+21 + ldd r3,Y+22 + movw r4,r20 + movw r6,r22 + movw r8,r26 + movw r10,r2 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + eor r20,r21 + eor r21,r22 + eor r22,r23 + eor r23,r26 + eor r26,r27 + eor r27,r2 + eor r2,r3 + eor r20,r4 + eor r21,r5 + eor r22,r6 + eor r23,r7 + eor r26,r8 + eor r27,r9 + eor r2,r10 + std Y+15,r20 + std Y+16,r21 + std Y+17,r22 + std Y+18,r23 + std Y+19,r26 + std Y+20,r27 + std Y+21,r2 + mov r20,r3 + ldd r21,Y+23 + ldd r22,Y+24 + ldd r23,Y+25 + ldd r26,Y+26 + ldd r27,Y+27 + ldd r2,Y+28 + ldd r3,Y+29 + movw r4,r20 + movw r6,r22 + movw r8,r26 + movw r10,r2 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + eor r20,r21 + eor r21,r22 + eor r22,r23 + eor r23,r26 + eor r26,r27 + eor r27,r2 + eor r2,r3 + eor r20,r4 + eor r21,r5 + eor r22,r6 + eor r23,r7 + eor r26,r8 + eor r27,r9 + eor r2,r10 + std Y+22,r20 + std Y+23,r21 + std Y+24,r22 + std Y+25,r23 + std Y+26,r26 + std Y+27,r27 + std Y+28,r2 + mov r20,r3 + ldd r21,Y+30 + ldd r22,Y+31 + ldd r23,Y+32 + mov r26,r18 + movw r4,r20 + movw r6,r22 + mov r8,r26 + lsr r8 + ror r7 + ror r6 + ror r5 + ror r4 + lsr r8 + ror r7 + ror r6 + ror r5 + ror r4 + lsr r8 + ror r7 + ror r6 + ror r5 + ror r4 + eor r20,r21 + eor r21,r22 + eor r22,r23 + eor r23,r26 + eor r20,r4 + eor r21,r5 + eor r22,r6 + eor r23,r7 + std Y+29,r20 + std Y+30,r21 + std Y+31,r22 + std Y+32,r23 + mov r20,r18 + lsr r20 + lsr r20 + lsr r20 + eor r18,r19 + eor r18,r20 + ldd r17,Y+1 + bst r17,0 + bld r20,0 + bst r17,1 + bld r14,6 + bst r17,2 + bld r27,3 + bst r17,4 + bld r6,6 + bst r17,6 + bld r12,1 + bst r17,7 + bld r22,6 + ldd r17,Y+2 + bst r17,0 + bld r25,4 + bst r17,1 + bld r4,1 + bst r17,3 + bld r9,4 + bst r17,4 + bld r20,1 + bst r17,5 + bld r14,7 + bst r17,6 + bld r27,4 + ldd r17,Y+3 + bst r17,0 + bld r6,7 + bst r17,2 + bld r12,2 + bst r17,3 + bld r22,7 + bst r17,4 + bld r25,5 + bst r17,5 + bld r4,2 + bst r17,7 + bld r9,5 + ldd r17,Y+4 + bst r17,0 + bld r20,2 + bst r17,1 + bld r15,0 + bst r17,2 + bld r27,5 + bst r17,4 + bld r7,0 + bst r17,6 + bld r12,3 + bst r17,7 + bld r23,0 + ldd r17,Y+5 + bst r17,0 + bld r25,6 + bst r17,1 + bld r4,3 + bst r17,3 + bld r9,6 + bst r17,4 + bld r20,3 + bst r17,5 + bld r15,1 + bst r17,6 + bld r27,6 + ldd r17,Y+6 + bst r17,0 + bld r7,1 + bst r17,2 + bld r12,4 + bst r17,3 + bld r23,1 + bst r17,4 + bld r25,7 + bst r17,5 + bld r4,4 + bst r17,7 + bld r9,7 + ldd r17,Y+7 + bst r17,0 + bld r20,4 + bst r17,1 + bld r15,2 + bst r17,2 + bld r27,7 + bst r17,4 + bld r7,2 + bst r17,6 + bld r12,5 + bst r17,7 + bld r23,2 + ldd r17,Y+8 + bst r17,0 + bld r16,0 + bst r17,1 + bld r4,5 + bst r17,3 + bld r10,0 + bst r17,4 + bld r20,5 + bst r17,5 + bld r15,3 + bst r17,6 + bld r2,0 + ldd r17,Y+9 + bst r17,0 + bld r7,3 + bst r17,2 + bld r12,6 + bst r17,3 + bld r23,3 + bst r17,4 + bld r16,1 + bst r17,5 + bld r4,6 + bst r17,7 + bld r10,1 + ldd r17,Y+10 + bst r17,0 + bld r20,6 + bst r17,1 + bld r15,4 + bst r17,2 + bld r2,1 + bst r17,4 + bld r7,4 + bst r17,6 + bld r12,7 + bst r17,7 + bld r23,4 + ldd r17,Y+11 + bst r17,0 + bld r16,2 + bst r17,1 + bld r4,7 + bst r17,3 + bld r10,2 + bst r17,4 + bld r20,7 + bst r17,5 + bld r15,5 + bst r17,6 + bld r2,2 + ldd r17,Y+12 + bst r17,0 + bld r7,5 + bst r17,2 + bld r13,0 + bst r17,3 + bld r23,5 + bst r17,4 + bld r16,3 + bst r17,5 + bld r5,0 + bst r17,7 + bld r10,3 + ldd r17,Y+13 + bst r17,0 + bld r21,0 + bst r17,1 + bld r15,6 + bst r17,2 + bld r2,3 + bst r17,4 + bld r7,6 + bst r17,6 + bld r13,1 + bst r17,7 + bld r23,6 + ldd r17,Y+14 + bst r17,0 + bld r16,4 + bst r17,1 + bld r5,1 + bst r17,3 + bld r10,4 + bst r17,4 + bld r21,1 + bst r17,5 + bld r15,7 + bst r17,6 + bld r2,4 + ldd r17,Y+15 + bst r17,0 + bld r7,7 + bst r17,2 + bld r13,2 + bst r17,3 + bld r23,7 + bst r17,4 + bld r16,5 + bst r17,5 + bld r5,2 + bst r17,7 + bld r10,5 + ldd r17,Y+16 + bst r17,0 + bld r21,2 + bst r17,1 + bld r24,0 + bst r17,2 + bld r2,5 + bst r17,4 + bld r8,0 + bst r17,6 + bld r13,3 + bst r17,7 + bld r26,0 + ldd r17,Y+17 + bst r17,0 + bld r16,6 + bst r17,1 + bld r5,3 + bst r17,3 + bld r10,6 + bst r17,4 + bld r21,3 + bst r17,5 + bld r24,1 + bst r17,6 + bld r2,6 + ldd r17,Y+18 + bst r17,0 + bld r8,1 + bst r17,2 + bld r13,4 + bst r17,3 + bld r26,1 + bst r17,4 + bld r16,7 + bst r17,5 + bld r5,4 + bst r17,7 + bld r10,7 + ldd r17,Y+19 + bst r17,0 + bld r21,4 + bst r17,1 + bld r24,2 + bst r17,2 + bld r2,7 + bst r17,4 + bld r8,2 + bst r17,6 + bld r13,5 + bst r17,7 + bld r26,2 + ldd r17,Y+20 + bst r17,1 + bld r5,5 + bst r17,3 + bld r11,0 + bst r17,4 + bld r21,5 + bst r17,5 + bld r24,3 + bst r17,6 + bld r3,0 + ldd r17,Y+21 + bst r17,0 + bld r8,3 + bst r17,2 + bld r13,6 + bst r17,3 + bld r26,3 + bst r17,5 + bld r5,6 + bst r17,7 + bld r11,1 + ldd r17,Y+22 + bst r17,0 + bld r21,6 + bst r17,1 + bld r24,4 + bst r17,2 + bld r3,1 + bst r17,4 + bld r8,4 + bst r17,6 + bld r13,7 + bst r17,7 + bld r26,4 + ldd r17,Y+23 + bst r17,1 + bld r5,7 + bst r17,3 + bld r11,2 + bst r17,4 + bld r21,7 + bst r17,5 + bld r24,5 + bst r17,6 + bld r3,2 + ldd r17,Y+24 + bst r17,0 + bld r8,5 + bst r17,2 + bld r14,0 + bst r17,3 + bld r26,5 + bst r17,5 + bld r6,0 + bst r17,7 + bld r11,3 + ldd r17,Y+25 + bst r17,0 + bld r22,0 + bst r17,1 + bld r24,6 + bst r17,2 + bld r3,3 + bst r17,4 + bld r8,6 + bst r17,6 + bld r14,1 + bst r17,7 + bld r26,6 + ldd r17,Y+26 + bst r17,1 + bld r6,1 + bst r17,3 + bld r11,4 + bst r17,4 + bld r22,1 + bst r17,5 + bld r24,7 + bst r17,6 + bld r3,4 + ldd r17,Y+27 + bst r17,0 + bld r8,7 + bst r17,2 + bld r14,2 + bst r17,3 + bld r26,7 + bst r17,5 + bld r6,2 + bst r17,7 + bld r11,5 + ldd r17,Y+28 + bst r17,0 + bld r22,2 + bst r17,1 + bld r25,0 + bst r17,2 + bld r3,5 + bst r17,4 + bld r9,0 + bst r17,6 + bld r14,3 + bst r17,7 + bld r27,0 + ldd r17,Y+29 + bst r17,1 + bld r6,3 + bst r17,3 + bld r11,6 + bst r17,4 + bld r22,3 + bst r17,5 + bld r25,1 + bst r17,6 + bld r3,6 + ldd r17,Y+30 + bst r17,0 + bld r9,1 + bst r17,2 + bld r14,4 + bst r17,3 + bld r27,1 + bst r17,5 + bld r6,4 + bst r17,7 + bld r11,7 + ldd r17,Y+31 + bst r17,0 + bld r22,4 + bst r17,1 + bld r25,2 + bst r17,2 + bld r3,7 + bst r17,4 + bld r9,2 + bst r17,6 + bld r14,5 + bst r17,7 + bld r27,2 + ldd r17,Y+32 + bst r17,1 + bld r6,5 + bst r17,3 + bld r12,0 + bst r17,4 + bld r22,5 + bst r17,5 + bld r25,3 + bst r17,6 + bld r4,0 + bst r18,0 + bld r9,3 + st Z,r20 + std Z+1,r21 + std Z+2,r22 + std Z+3,r23 + std Z+4,r26 + std Z+5,r27 + std Z+6,r2 + std Z+7,r3 + std Z+8,r4 + std Z+9,r5 + std Z+10,r6 + std Z+11,r7 + std Z+12,r8 + std Z+13,r9 + std Z+14,r10 + std Z+15,r11 + std Z+16,r12 + std Z+17,r13 + std Z+18,r14 + std Z+19,r15 + std Z+20,r24 + std Z+21,r25 + std Z+22,r16 + mov r5,r1 + ldd r17,Y+1 + bst r17,3 + bld r21,1 + bst r17,5 + bld r2,4 + ldd r17,Y+2 + bst r17,2 + bld r23,7 + bst r17,7 + bld r21,2 + ldd r17,Y+3 + bst r17,1 + bld r2,5 + bst r17,6 + bld r26,0 + ldd r17,Y+4 + bst r17,3 + bld r21,3 + bst r17,5 + bld r2,6 + ldd r17,Y+5 + bst r17,2 + bld r26,1 + bst r17,7 + bld r21,4 + ldd r17,Y+6 + bst r17,1 + bld r2,7 + bst r17,6 + bld r26,2 + ldd r17,Y+7 + bst r17,3 + bld r21,5 + bst r17,5 + bld r3,0 + ldd r17,Y+8 + bst r17,2 + bld r26,3 + bst r17,7 + bld r21,6 + ldd r17,Y+9 + bst r17,1 + bld r3,1 + bst r17,6 + bld r26,4 + ldd r17,Y+10 + bst r17,3 + bld r21,7 + bst r17,5 + bld r3,2 + ldd r17,Y+11 + bst r17,2 + bld r26,5 + bst r17,7 + bld r22,0 + ldd r17,Y+12 + bst r17,1 + bld r3,3 + bst r17,6 + bld r26,6 + ldd r17,Y+13 + bst r17,3 + bld r22,1 + bst r17,5 + bld r3,4 + ldd r17,Y+14 + bst r17,2 + bld r26,7 + bst r17,7 + bld r22,2 + ldd r17,Y+15 + bst r17,1 + bld r3,5 + bst r17,6 + bld r27,0 + ldd r17,Y+16 + bst r17,3 + bld r22,3 + bst r17,5 + bld r3,6 + ldd r17,Y+17 + bst r17,2 + bld r27,1 + bst r17,7 + bld r22,4 + ldd r17,Y+18 + bst r17,1 + bld r3,7 + bst r17,6 + bld r27,2 + ldd r17,Y+19 + bst r17,3 + bld r22,5 + bst r17,5 + bld r4,0 + ldd r17,Y+20 + bst r17,0 + bld r20,0 + bst r17,2 + bld r27,3 + bst r17,7 + bld r22,6 + ldd r17,Y+21 + bst r17,1 + bld r4,1 + bst r17,4 + bld r20,1 + bst r17,6 + bld r27,4 + ldd r17,Y+22 + bst r17,3 + bld r22,7 + bst r17,5 + bld r4,2 + ldd r17,Y+23 + bst r17,0 + bld r20,2 + bst r17,2 + bld r27,5 + bst r17,7 + bld r23,0 + ldd r17,Y+24 + bst r17,1 + bld r4,3 + bst r17,4 + bld r20,3 + bst r17,6 + bld r27,6 + ldd r17,Y+25 + bst r17,3 + bld r23,1 + bst r17,5 + bld r4,4 + ldd r17,Y+26 + bst r17,0 + bld r20,4 + bst r17,2 + bld r27,7 + bst r17,7 + bld r23,2 + ldd r17,Y+27 + bst r17,1 + bld r4,5 + bst r17,4 + bld r20,5 + bst r17,6 + bld r2,0 + ldd r17,Y+28 + bst r17,3 + bld r23,3 + bst r17,5 + bld r4,6 + ldd r17,Y+29 + bst r17,0 + bld r20,6 + bst r17,2 + bld r2,1 + bst r17,7 + bld r23,4 + ldd r17,Y+30 + bst r17,1 + bld r4,7 + bst r17,4 + bld r20,7 + bst r17,6 + bld r2,2 + ldd r17,Y+31 + bst r17,3 + bld r23,5 + bst r17,5 + bld r5,0 + ldd r17,Y+32 + bst r17,0 + bld r21,0 + bst r17,2 + bld r2,3 + bst r17,7 + bld r23,6 + std Z+23,r20 + std Z+24,r21 + std Z+25,r22 + std Z+26,r23 + std Z+27,r26 + std Z+28,r27 + std Z+29,r2 + std Z+30,r3 + std Z+31,r4 + std Z+32,r5 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size subterranean_round, .-subterranean_round + + .text +.global subterranean_absorb_1 + .type subterranean_absorb_1, @function +subterranean_absorb_1: + movw r30,r24 +.L__stack_usage = 2 + ldi r23,1 + mov r18,r1 + bst r22,0 + bld r18,1 + ld r0,Z + eor r0,r18 + st Z,r0 + mov r18,r1 + bst r22,3 + bld r18,3 + ldd r0,Z+4 + eor r0,r18 + std Z+4,r0 + mov r18,r1 + bst r23,0 + bld r18,0 + ldd r0,Z+8 + eor r0,r18 + std Z+8,r0 + mov r18,r1 + bst r22,5 + bld r18,6 + ldd r0,Z+16 + eor r0,r18 + std Z+16,r0 + mov r18,r1 + bst r22,2 + bld r18,0 + ldd r0,Z+17 + eor r0,r18 + std Z+17,r0 + mov r18,r1 + bst r22,1 + bld r18,0 + ldd r0,Z+22 + eor r0,r18 + std Z+22,r0 + mov r18,r1 + bst r22,6 + bld r18,5 + ldd r0,Z+24 + eor r0,r18 + std Z+24,r0 + mov r18,r1 + bst r22,7 + bld r18,2 + ldd r0,Z+29 + eor r0,r18 + std Z+29,r0 + mov r18,r1 + bst r22,4 + bld r18,1 + ldd r0,Z+31 + eor r0,r18 + std Z+31,r0 + ret + .size subterranean_absorb_1, .-subterranean_absorb_1 + + .text +.global subterranean_absorb_word + .type subterranean_absorb_word, @function +subterranean_absorb_word: + movw r30,r24 +.L__stack_usage = 2 + mov r18,r1 + bst r20,0 + bld r18,1 + bst r21,4 + bld r18,2 + bst r23,0 + bld r18,4 + ld r0,Z + eor r0,r18 + st Z,r0 + mov r18,r1 + bst r22,1 + bld r18,3 + bst r21,6 + bld r18,7 + ldd r0,Z+1 + eor r0,r18 + std Z+1,r0 + mov r18,r1 + bst r23,6 + bld r18,1 + bst r23,5 + bld r18,6 + ldd r0,Z+2 + eor r0,r18 + std Z+2,r0 + mov r18,r1 + bst r23,2 + bld r18,6 + ldd r0,Z+3 + eor r0,r18 + std Z+3,r0 + mov r18,r1 + bst r20,3 + bld r18,3 + ldd r0,Z+4 + eor r0,r18 + std Z+4,r0 + mov r18,r1 + bst r21,0 + bld r18,0 + bst r21,7 + bld r18,6 + ldd r0,Z+8 + eor r0,r18 + std Z+8,r0 + mov r18,r1 + bst r21,5 + bld r18,7 + ldd r0,Z+11 + eor r0,r18 + std Z+11,r0 + mov r18,r1 + bst r22,7 + bld r18,7 + ldd r0,Z+13 + eor r0,r18 + std Z+13,r0 + mov r18,r1 + bst r22,4 + bld r18,0 + bst r20,5 + bld r18,6 + ldd r0,Z+16 + eor r0,r18 + std Z+16,r0 + mov r18,r1 + bst r20,2 + bld r18,0 + bst r22,2 + bld r18,1 + bst r23,3 + bld r18,4 + ldd r0,Z+17 + eor r0,r18 + std Z+17,r0 + mov r18,r1 + bst r23,7 + bld r18,5 + ldd r0,Z+20 + eor r0,r18 + std Z+20,r0 + mov r18,r1 + bst r22,5 + bld r18,1 + ldd r0,Z+21 + eor r0,r18 + std Z+21,r0 + mov r18,r1 + bst r20,1 + bld r18,0 + ldd r0,Z+22 + eor r0,r18 + std Z+22,r0 + mov r18,r1 + bst r21,3 + bld r18,0 + bst r22,6 + bld r18,5 + bst r23,1 + bld r18,6 + ldd r0,Z+23 + eor r0,r18 + std Z+23,r0 + mov r18,r1 + bst r20,6 + bld r18,5 + ldd r0,Z+24 + eor r0,r18 + std Z+24,r0 + mov r18,r1 + bst r22,3 + bld r18,3 + bst r21,1 + bld r18,5 + ldd r0,Z+26 + eor r0,r18 + std Z+26,r0 + mov r18,r1 + bst r21,2 + bld r18,7 + ldd r0,Z+27 + eor r0,r18 + std Z+27,r0 + mov r18,r1 + bst r23,4 + bld r18,1 + ldd r0,Z+28 + eor r0,r18 + std Z+28,r0 + mov r18,r1 + bst r20,7 + bld r18,2 + ldd r0,Z+29 + eor r0,r18 + std Z+29,r0 + mov r18,r1 + bst r22,0 + bld r18,1 + ldd r0,Z+30 + eor r0,r18 + std Z+30,r0 + mov r18,r1 + bst r20,4 + bld r18,1 + ldd r0,Z+31 + eor r0,r18 + std Z+31,r0 + ret + .size subterranean_absorb_word, .-subterranean_absorb_word + + .text +.global subterranean_extract + .type subterranean_extract, @function +subterranean_extract: + movw r30,r24 +.L__stack_usage = 2 + ld r26,Z + bst r26,1 + bld r22,0 + bst r26,2 + bld r23,4 + bst r26,4 + bld r25,0 + ldd r26,Z+1 + bst r26,0 + bld r18,4 + bst r26,3 + bld r24,1 + bst r26,7 + bld r23,6 + ldd r26,Z+2 + bst r26,0 + bld r20,0 + bst r26,1 + bld r25,6 + bst r26,6 + bld r25,5 + bst r26,7 + bld r18,7 + ldd r26,Z+3 + bst r26,6 + bld r25,2 + ldd r26,Z+4 + bst r26,0 + bld r21,4 + bst r26,2 + bld r19,2 + bst r26,3 + bld r22,3 + ldd r26,Z+5 + bst r26,4 + bld r19,1 + bst r26,6 + bld r20,3 + ldd r26,Z+7 + bst r26,4 + bld r18,6 + ldd r26,Z+8 + bst r26,0 + bld r23,0 + bst r26,3 + bld r21,1 + bst r26,4 + bld r20,6 + bst r26,6 + bld r23,7 + ldd r26,Z+9 + bst r26,1 + bld r19,3 + ldd r26,Z+10 + bst r26,1 + bld r18,1 + ldd r26,Z+11 + bst r26,0 + bld r20,5 + bst r26,4 + bld r21,7 + bst r26,7 + bld r23,5 + ldd r26,Z+13 + bst r26,7 + bld r24,7 + ldd r26,Z+14 + bst r26,5 + bld r21,3 + ldd r26,Z+15 + bst r26,0 + bld r20,2 + bst r26,1 + bld r18,2 + bst r26,3 + bld r18,5 + ldd r26,Z+16 + bst r26,0 + bld r24,4 + bst r26,1 + bld r20,4 + bst r26,6 + bld r22,5 + ldd r26,Z+17 + bst r26,0 + bld r22,2 + bst r26,1 + bld r24,2 + bst r26,4 + bld r25,3 + ldd r26,Z+18 + bst r26,2 + bld r20,7 + ldd r26,Z+20 + bst r26,2 + bld r19,5 + bst r26,5 + bld r25,7 + ldd r26,Z+21 + bst r26,1 + bld r24,5 + ldd r26,Z+22 + bst r26,0 + bld r22,1 + ldd r26,Z+23 + bst r26,0 + bld r23,3 + bst r26,3 + bld r19,7 + bst r26,5 + bld r24,6 + bst r26,6 + bld r25,1 + ldd r26,Z+24 + bst r26,1 + bld r19,0 + bst r26,5 + bld r22,6 + ldd r26,Z+26 + bst r26,3 + bld r24,3 + bst r26,5 + bld r23,1 + ldd r26,Z+27 + bst r26,6 + bld r18,3 + bst r26,7 + bld r23,2 + ldd r26,Z+28 + bst r26,1 + bld r25,4 + bst r26,3 + bld r21,2 + ldd r26,Z+29 + bst r26,2 + bld r22,7 + bst r26,3 + bld r21,5 + ldd r26,Z+30 + bst r26,0 + bld r21,6 + bst r26,1 + bld r24,0 + bst r26,2 + bld r19,6 + bst r26,6 + bld r20,1 + ldd r26,Z+31 + bst r26,1 + bld r22,4 + bst r26,5 + bld r21,0 + bst r26,7 + bld r19,4 + ldd r26,Z+32 + bst r26,0 + bld r18,0 + eor r22,r18 + eor r23,r19 + eor r24,r20 + eor r25,r21 + ret + .size subterranean_extract, .-subterranean_extract + +#endif diff --git a/subterranean/Implementations/crypto_aead/subterraneanv1/rhys/internal-subterranean.c b/subterranean/Implementations/crypto_aead/subterraneanv1/rhys/internal-subterranean.c index 1cb64e2..71b1c4c 100644 --- a/subterranean/Implementations/crypto_aead/subterraneanv1/rhys/internal-subterranean.c +++ b/subterranean/Implementations/crypto_aead/subterraneanv1/rhys/internal-subterranean.c @@ -23,6 +23,8 @@ #include "internal-subterranean.h" #include +#if !defined(__AVR__) + void subterranean_round(subterranean_state_t *state) { uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8; @@ -168,28 +170,10 @@ void subterranean_round(subterranean_state_t *state) state->x[8] = BDN(x7, 21, 0); } -void subterranean_blank(subterranean_state_t *state) -{ - unsigned round; - for (round = 0; round < 8; ++round) { - subterranean_round(state); - state->x[0] ^= 0x02; /* padding for an empty block is in state bit 1 */ - } -} - -void subterranean_duplex_0(subterranean_state_t *state) -{ - subterranean_round(state); - state->x[0] ^= 0x02; /* padding for an empty block is in state bit 1 */ -} - -void subterranean_duplex_1(subterranean_state_t *state, unsigned char data) +void subterranean_absorb_1(subterranean_state_t *state, unsigned char data) { uint32_t x = data; - /* Perform a single Subterranean round before absorbing the bits */ - subterranean_round(state); - /* Rearrange the bits and absorb them into the state */ state->x[0] ^= (x << 1) & 0x00000002U; state->x[1] ^= x & 0x00000008U; @@ -200,13 +184,10 @@ void subterranean_duplex_1(subterranean_state_t *state, unsigned char data) state->x[7] ^= ((x << 21) & 0x02000000U) ^ ((x << 3) & 0x00000400U); } -void subterranean_duplex_word(subterranean_state_t *state, uint32_t x) +void subterranean_absorb_word(subterranean_state_t *state, uint32_t x) { uint32_t y; - /* Perform a single Subterranean round before absorbing the bits */ - subterranean_round(state); - /* To absorb the word into the state, we first rearrange the source * bits to be in the right target bit positions. Then we mask and * XOR them into the relevant words of the state. @@ -258,39 +239,6 @@ void subterranean_duplex_word(subterranean_state_t *state, uint32_t x) state->x[7] ^= (y & 0x02000400U) ^ (x & 0x00020002U); } -void subterranean_duplex_n - (subterranean_state_t *state, const unsigned char *data, unsigned len) -{ - switch (len) { - case 0: - subterranean_duplex_0(state); - break; - case 1: - subterranean_duplex_1(state, data[0]); - break; - case 2: - /* Load 16 bits and add the padding bit to the 17th bit */ - subterranean_duplex_word - (state, ((uint32_t)(data[0]) | - (((uint32_t)(data[1])) << 8) | - 0x10000U)); - break; - case 3: - /* Load 24 bits and add the padding bit to the 25th bit */ - subterranean_duplex_word - (state, ((uint32_t)(data[0]) | - (((uint32_t)(data[1])) << 8) | - (((uint32_t)(data[2])) << 16) | - 0x01000000U)); - break; - default: - /* Load 32 bits and add the padding bit to the 33rd bit */ - subterranean_duplex_word(state, le_load_word32(data)); - state->x[8] ^= 0x00000001U; - break; - } -} - uint32_t subterranean_extract(subterranean_state_t *state) { uint32_t x, y; @@ -399,12 +347,57 @@ uint32_t subterranean_extract(subterranean_state_t *state) return y ^ state->x[8]; } +#endif /* !__AVR__ */ + +void subterranean_blank(subterranean_state_t *state) +{ + unsigned round; + for (round = 0; round < 8; ++round) { + subterranean_round(state); + state->x[0] ^= 0x02; /* padding for an empty block is in state bit 1 */ + } +} + +void subterranean_duplex_n + (subterranean_state_t *state, const unsigned char *data, unsigned len) +{ + subterranean_round(state); + switch (len) { + case 0: + state->x[0] ^= 0x02; /* padding for an empty block */ + break; + case 1: + subterranean_absorb_1(state, data[0]); + break; + case 2: + /* Load 16 bits and add the padding bit to the 17th bit */ + subterranean_absorb_word + (state, ((uint32_t)(data[0]) | + (((uint32_t)(data[1])) << 8) | + 0x10000U)); + break; + case 3: + /* Load 24 bits and add the padding bit to the 25th bit */ + subterranean_absorb_word + (state, ((uint32_t)(data[0]) | + (((uint32_t)(data[1])) << 8) | + (((uint32_t)(data[2])) << 16) | + 0x01000000U)); + break; + default: + /* Load 32 bits and add the padding bit to the 33rd bit */ + subterranean_absorb_word(state, le_load_word32(data)); + state->x[8] ^= 0x00000001U; + break; + } +} + void subterranean_absorb (subterranean_state_t *state, const unsigned char *data, unsigned long long len) { while (len >= 4) { - subterranean_duplex_4(state, data); + subterranean_duplex_4(state, le_load_word32(data)); data += 4; len -= 4; } diff --git a/subterranean/Implementations/crypto_aead/subterraneanv1/rhys/internal-subterranean.h b/subterranean/Implementations/crypto_aead/subterraneanv1/rhys/internal-subterranean.h index 71cebb2..8ebbd30 100644 --- a/subterranean/Implementations/crypto_aead/subterraneanv1/rhys/internal-subterranean.h +++ b/subterranean/Implementations/crypto_aead/subterraneanv1/rhys/internal-subterranean.h @@ -28,6 +28,8 @@ /** * \file internal-subterranean.h * \brief Internal implementation of the Subterranean block operation. + * + * References: https://cs.ru.nl/~joan/subterranean.html */ #ifdef __cplusplus @@ -66,7 +68,19 @@ void subterranean_blank(subterranean_state_t *state); * * \param state Subterranean state to be transformed. */ -void subterranean_duplex_0(subterranean_state_t *state); +#define subterranean_duplex_0(state) \ + do { \ + subterranean_round((state)); \ + (state)->x[0] ^= 2; /* padding for an empty block */ \ + } while (0) + +/** + * \brief Absorbs a single byte into the Subterranean state. + * + * \param state Subterranean state to be transformed. + * \param data The single byte to be absorbed. + */ +void subterranean_absorb_1(subterranean_state_t *state, unsigned char data); /** * \brief Performs a single Subterranean round and absorbs one byte. @@ -74,7 +88,11 @@ void subterranean_duplex_0(subterranean_state_t *state); * \param state Subterranean state to be transformed. * \param data The single byte to be absorbed. */ -void subterranean_duplex_1(subterranean_state_t *state, unsigned char data); +#define subterranean_duplex_1(state, data) \ + do { \ + subterranean_round((state)); \ + subterranean_absorb_1((state), (data)); \ + } while (0) /** * \brief Absorbs a 32-bit word into the Subterranean state. @@ -82,17 +100,30 @@ void subterranean_duplex_1(subterranean_state_t *state, unsigned char data); * \param state Subterranean state to be transformed. * \param x The word to absorb into the state. */ -void subterranean_duplex_word(subterranean_state_t *state, uint32_t x); +void subterranean_absorb_word(subterranean_state_t *state, uint32_t x); + +/** + * \brief Absorbs a 32-bit word into the Subterranean state after performing + * the round function. + * + * \param state Subterranean state to be transformed. + * \param x The word to absorb into the state. + */ +#define subterranean_duplex_word(state, x) \ + do { \ + subterranean_round((state)); \ + subterranean_absorb_word((state), (x)); \ + } while (0) /** * \brief Performs a single Subterranean round and absorbs four bytes. * * \param state Subterranean state to be transformed. - * \param data Points to the four data bytes to be absorbed. + * \param data 32-bit word containing the four data bytes to be absorbed. */ #define subterranean_duplex_4(state, data) \ do { \ - subterranean_duplex_word((state), le_load_word32((data))); \ + subterranean_duplex_word((state), (data)); \ (state)->x[8] ^= 1; \ } while (0) diff --git a/subterranean/Implementations/crypto_aead/subterraneanv1/rhys/subterranean.c b/subterranean/Implementations/crypto_aead/subterraneanv1/rhys/subterranean.c index 1bc9fc4..aad147a 100644 --- a/subterranean/Implementations/crypto_aead/subterraneanv1/rhys/subterranean.c +++ b/subterranean/Implementations/crypto_aead/subterraneanv1/rhys/subterranean.c @@ -75,8 +75,7 @@ int subterranean_aead_encrypt while (mlen >= 4) { x1 = le_load_word32(m); x2 = subterranean_extract(&state) ^ x1; - subterranean_duplex_word(&state, x1); - state.x[8] ^= 1; /* padding for 32-bit blocks */ + subterranean_duplex_4(&state, x1); le_store_word32(c, x2); c += 4; m += 4; @@ -146,8 +145,7 @@ int subterranean_aead_decrypt while (clen >= 4) { x = le_load_word32(c); x ^= subterranean_extract(&state); - subterranean_duplex_word(&state, x); - state.x[8] ^= 1; /* padding for 32-bit blocks */ + subterranean_duplex_4(&state, x); le_store_word32(m, x); c += 4; m += 4; diff --git a/subterranean/Implementations/crypto_aead/subterraneanv1/rhys/subterranean.h b/subterranean/Implementations/crypto_aead/subterraneanv1/rhys/subterranean.h index 148e5e8..3b35b42 100644 --- a/subterranean/Implementations/crypto_aead/subterraneanv1/rhys/subterranean.h +++ b/subterranean/Implementations/crypto_aead/subterraneanv1/rhys/subterranean.h @@ -38,6 +38,8 @@ * * The Subterranean permutation is intended for hardware implementation. * It is not structured for efficient software implementation. + * + * References: https://cs.ru.nl/~joan/subterranean.html */ #ifdef __cplusplus diff --git a/subterranean/Implementations/crypto_hash/subterraneanv1/rhys/internal-subterranean-avr.S b/subterranean/Implementations/crypto_hash/subterraneanv1/rhys/internal-subterranean-avr.S new file mode 100644 index 0000000..6380870 --- /dev/null +++ b/subterranean/Implementations/crypto_hash/subterraneanv1/rhys/internal-subterranean-avr.S @@ -0,0 +1,1632 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .text +.global subterranean_round + .type subterranean_round, @function +subterranean_round: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 50 + ld r20,Z + ldd r21,Z+1 + ldd r22,Z+2 + ldd r23,Z+3 + ldd r26,Z+4 + ldd r27,Z+5 + ldd r2,Z+6 + ldd r3,Z+7 + mov r18,r20 + lsl r18 + ldd r0,Z+32 + eor r18,r0 + movw r4,r20 + movw r6,r22 + movw r8,r26 + movw r10,r2 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r15 + ror r14 + ror r13 + ror r12 + com r4 + com r5 + com r6 + com r7 + com r8 + com r9 + com r10 + and r4,r12 + and r5,r13 + and r6,r14 + and r7,r15 + and r8,r24 + and r9,r25 + and r10,r16 + eor r20,r4 + eor r21,r5 + eor r22,r6 + eor r23,r7 + eor r26,r8 + eor r27,r9 + eor r2,r10 + ldi r25,1 + eor r20,r25 + std Y+1,r20 + std Y+2,r21 + std Y+3,r22 + std Y+4,r23 + std Y+5,r26 + std Y+6,r27 + std Y+7,r2 + mov r20,r3 + ldd r21,Z+8 + ldd r22,Z+9 + ldd r23,Z+10 + ldd r26,Z+11 + ldd r27,Z+12 + ldd r2,Z+13 + ldd r3,Z+14 + movw r4,r20 + movw r6,r22 + movw r8,r26 + movw r10,r2 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r15 + ror r14 + ror r13 + ror r12 + com r4 + com r5 + com r6 + com r7 + com r8 + com r9 + com r10 + and r4,r12 + and r5,r13 + and r6,r14 + and r7,r15 + and r8,r24 + and r9,r25 + and r10,r16 + eor r20,r4 + eor r21,r5 + eor r22,r6 + eor r23,r7 + eor r26,r8 + eor r27,r9 + eor r2,r10 + std Y+8,r20 + std Y+9,r21 + std Y+10,r22 + std Y+11,r23 + std Y+12,r26 + std Y+13,r27 + std Y+14,r2 + mov r20,r3 + ldd r21,Z+15 + ldd r22,Z+16 + ldd r23,Z+17 + ldd r26,Z+18 + ldd r27,Z+19 + ldd r2,Z+20 + ldd r3,Z+21 + movw r4,r20 + movw r6,r22 + movw r8,r26 + movw r10,r2 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r15 + ror r14 + ror r13 + ror r12 + com r4 + com r5 + com r6 + com r7 + com r8 + com r9 + com r10 + and r4,r12 + and r5,r13 + and r6,r14 + and r7,r15 + and r8,r24 + and r9,r25 + and r10,r16 + eor r20,r4 + eor r21,r5 + eor r22,r6 + eor r23,r7 + eor r26,r8 + eor r27,r9 + eor r2,r10 + std Y+15,r20 + std Y+16,r21 + std Y+17,r22 + std Y+18,r23 + std Y+19,r26 + std Y+20,r27 + std Y+21,r2 + mov r20,r3 + ldd r21,Z+22 + ldd r22,Z+23 + ldd r23,Z+24 + ldd r26,Z+25 + ldd r27,Z+26 + ldd r2,Z+27 + ldd r3,Z+28 + movw r4,r20 + movw r6,r22 + movw r8,r26 + movw r10,r2 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + movw r12,r4 + movw r14,r6 + movw r24,r8 + movw r16,r10 + lsr r17 + ror r16 + ror r25 + ror r24 + ror r15 + ror r14 + ror r13 + ror r12 + com r4 + com r5 + com r6 + com r7 + com r8 + com r9 + com r10 + and r4,r12 + and r5,r13 + and r6,r14 + and r7,r15 + and r8,r24 + and r9,r25 + and r10,r16 + eor r20,r4 + eor r21,r5 + eor r22,r6 + eor r23,r7 + eor r26,r8 + eor r27,r9 + eor r2,r10 + std Y+22,r20 + std Y+23,r21 + std Y+24,r22 + std Y+25,r23 + std Y+26,r26 + std Y+27,r27 + std Y+28,r2 + mov r20,r3 + ldd r21,Z+29 + ldd r22,Z+30 + ldd r23,Z+31 + mov r26,r18 + movw r4,r20 + movw r6,r22 + mov r8,r26 + lsr r8 + ror r7 + ror r6 + ror r5 + ror r4 + movw r12,r4 + movw r14,r6 + mov r24,r8 + lsr r24 + ror r15 + ror r14 + ror r13 + ror r12 + com r4 + com r5 + com r6 + com r7 + and r4,r12 + and r5,r13 + and r6,r14 + and r7,r15 + eor r20,r4 + eor r21,r5 + eor r22,r6 + eor r23,r7 + std Y+29,r20 + std Y+30,r21 + std Y+31,r22 + std Y+32,r23 + mov r20,r18 + lsr r20 + mov r21,r20 + lsr r21 + com r20 + and r20,r21 + eor r18,r20 + andi r18,1 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + ldd r23,Y+4 + ldd r26,Y+5 + ldd r27,Y+6 + ldd r2,Y+7 + ldd r3,Y+8 + movw r4,r20 + lsl r4 + rol r5 + eor r18,r4 + mov r19,r5 + movw r4,r20 + movw r6,r22 + movw r8,r26 + movw r10,r2 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + eor r20,r21 + eor r21,r22 + eor r22,r23 + eor r23,r26 + eor r26,r27 + eor r27,r2 + eor r2,r3 + eor r20,r4 + eor r21,r5 + eor r22,r6 + eor r23,r7 + eor r26,r8 + eor r27,r9 + eor r2,r10 + std Y+1,r20 + std Y+2,r21 + std Y+3,r22 + std Y+4,r23 + std Y+5,r26 + std Y+6,r27 + std Y+7,r2 + mov r20,r3 + ldd r21,Y+9 + ldd r22,Y+10 + ldd r23,Y+11 + ldd r26,Y+12 + ldd r27,Y+13 + ldd r2,Y+14 + ldd r3,Y+15 + movw r4,r20 + movw r6,r22 + movw r8,r26 + movw r10,r2 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + eor r20,r21 + eor r21,r22 + eor r22,r23 + eor r23,r26 + eor r26,r27 + eor r27,r2 + eor r2,r3 + eor r20,r4 + eor r21,r5 + eor r22,r6 + eor r23,r7 + eor r26,r8 + eor r27,r9 + eor r2,r10 + std Y+8,r20 + std Y+9,r21 + std Y+10,r22 + std Y+11,r23 + std Y+12,r26 + std Y+13,r27 + std Y+14,r2 + mov r20,r3 + ldd r21,Y+16 + ldd r22,Y+17 + ldd r23,Y+18 + ldd r26,Y+19 + ldd r27,Y+20 + ldd r2,Y+21 + ldd r3,Y+22 + movw r4,r20 + movw r6,r22 + movw r8,r26 + movw r10,r2 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + eor r20,r21 + eor r21,r22 + eor r22,r23 + eor r23,r26 + eor r26,r27 + eor r27,r2 + eor r2,r3 + eor r20,r4 + eor r21,r5 + eor r22,r6 + eor r23,r7 + eor r26,r8 + eor r27,r9 + eor r2,r10 + std Y+15,r20 + std Y+16,r21 + std Y+17,r22 + std Y+18,r23 + std Y+19,r26 + std Y+20,r27 + std Y+21,r2 + mov r20,r3 + ldd r21,Y+23 + ldd r22,Y+24 + ldd r23,Y+25 + ldd r26,Y+26 + ldd r27,Y+27 + ldd r2,Y+28 + ldd r3,Y+29 + movw r4,r20 + movw r6,r22 + movw r8,r26 + movw r10,r2 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + lsr r11 + ror r10 + ror r9 + ror r8 + ror r7 + ror r6 + ror r5 + ror r4 + eor r20,r21 + eor r21,r22 + eor r22,r23 + eor r23,r26 + eor r26,r27 + eor r27,r2 + eor r2,r3 + eor r20,r4 + eor r21,r5 + eor r22,r6 + eor r23,r7 + eor r26,r8 + eor r27,r9 + eor r2,r10 + std Y+22,r20 + std Y+23,r21 + std Y+24,r22 + std Y+25,r23 + std Y+26,r26 + std Y+27,r27 + std Y+28,r2 + mov r20,r3 + ldd r21,Y+30 + ldd r22,Y+31 + ldd r23,Y+32 + mov r26,r18 + movw r4,r20 + movw r6,r22 + mov r8,r26 + lsr r8 + ror r7 + ror r6 + ror r5 + ror r4 + lsr r8 + ror r7 + ror r6 + ror r5 + ror r4 + lsr r8 + ror r7 + ror r6 + ror r5 + ror r4 + eor r20,r21 + eor r21,r22 + eor r22,r23 + eor r23,r26 + eor r20,r4 + eor r21,r5 + eor r22,r6 + eor r23,r7 + std Y+29,r20 + std Y+30,r21 + std Y+31,r22 + std Y+32,r23 + mov r20,r18 + lsr r20 + lsr r20 + lsr r20 + eor r18,r19 + eor r18,r20 + ldd r17,Y+1 + bst r17,0 + bld r20,0 + bst r17,1 + bld r14,6 + bst r17,2 + bld r27,3 + bst r17,4 + bld r6,6 + bst r17,6 + bld r12,1 + bst r17,7 + bld r22,6 + ldd r17,Y+2 + bst r17,0 + bld r25,4 + bst r17,1 + bld r4,1 + bst r17,3 + bld r9,4 + bst r17,4 + bld r20,1 + bst r17,5 + bld r14,7 + bst r17,6 + bld r27,4 + ldd r17,Y+3 + bst r17,0 + bld r6,7 + bst r17,2 + bld r12,2 + bst r17,3 + bld r22,7 + bst r17,4 + bld r25,5 + bst r17,5 + bld r4,2 + bst r17,7 + bld r9,5 + ldd r17,Y+4 + bst r17,0 + bld r20,2 + bst r17,1 + bld r15,0 + bst r17,2 + bld r27,5 + bst r17,4 + bld r7,0 + bst r17,6 + bld r12,3 + bst r17,7 + bld r23,0 + ldd r17,Y+5 + bst r17,0 + bld r25,6 + bst r17,1 + bld r4,3 + bst r17,3 + bld r9,6 + bst r17,4 + bld r20,3 + bst r17,5 + bld r15,1 + bst r17,6 + bld r27,6 + ldd r17,Y+6 + bst r17,0 + bld r7,1 + bst r17,2 + bld r12,4 + bst r17,3 + bld r23,1 + bst r17,4 + bld r25,7 + bst r17,5 + bld r4,4 + bst r17,7 + bld r9,7 + ldd r17,Y+7 + bst r17,0 + bld r20,4 + bst r17,1 + bld r15,2 + bst r17,2 + bld r27,7 + bst r17,4 + bld r7,2 + bst r17,6 + bld r12,5 + bst r17,7 + bld r23,2 + ldd r17,Y+8 + bst r17,0 + bld r16,0 + bst r17,1 + bld r4,5 + bst r17,3 + bld r10,0 + bst r17,4 + bld r20,5 + bst r17,5 + bld r15,3 + bst r17,6 + bld r2,0 + ldd r17,Y+9 + bst r17,0 + bld r7,3 + bst r17,2 + bld r12,6 + bst r17,3 + bld r23,3 + bst r17,4 + bld r16,1 + bst r17,5 + bld r4,6 + bst r17,7 + bld r10,1 + ldd r17,Y+10 + bst r17,0 + bld r20,6 + bst r17,1 + bld r15,4 + bst r17,2 + bld r2,1 + bst r17,4 + bld r7,4 + bst r17,6 + bld r12,7 + bst r17,7 + bld r23,4 + ldd r17,Y+11 + bst r17,0 + bld r16,2 + bst r17,1 + bld r4,7 + bst r17,3 + bld r10,2 + bst r17,4 + bld r20,7 + bst r17,5 + bld r15,5 + bst r17,6 + bld r2,2 + ldd r17,Y+12 + bst r17,0 + bld r7,5 + bst r17,2 + bld r13,0 + bst r17,3 + bld r23,5 + bst r17,4 + bld r16,3 + bst r17,5 + bld r5,0 + bst r17,7 + bld r10,3 + ldd r17,Y+13 + bst r17,0 + bld r21,0 + bst r17,1 + bld r15,6 + bst r17,2 + bld r2,3 + bst r17,4 + bld r7,6 + bst r17,6 + bld r13,1 + bst r17,7 + bld r23,6 + ldd r17,Y+14 + bst r17,0 + bld r16,4 + bst r17,1 + bld r5,1 + bst r17,3 + bld r10,4 + bst r17,4 + bld r21,1 + bst r17,5 + bld r15,7 + bst r17,6 + bld r2,4 + ldd r17,Y+15 + bst r17,0 + bld r7,7 + bst r17,2 + bld r13,2 + bst r17,3 + bld r23,7 + bst r17,4 + bld r16,5 + bst r17,5 + bld r5,2 + bst r17,7 + bld r10,5 + ldd r17,Y+16 + bst r17,0 + bld r21,2 + bst r17,1 + bld r24,0 + bst r17,2 + bld r2,5 + bst r17,4 + bld r8,0 + bst r17,6 + bld r13,3 + bst r17,7 + bld r26,0 + ldd r17,Y+17 + bst r17,0 + bld r16,6 + bst r17,1 + bld r5,3 + bst r17,3 + bld r10,6 + bst r17,4 + bld r21,3 + bst r17,5 + bld r24,1 + bst r17,6 + bld r2,6 + ldd r17,Y+18 + bst r17,0 + bld r8,1 + bst r17,2 + bld r13,4 + bst r17,3 + bld r26,1 + bst r17,4 + bld r16,7 + bst r17,5 + bld r5,4 + bst r17,7 + bld r10,7 + ldd r17,Y+19 + bst r17,0 + bld r21,4 + bst r17,1 + bld r24,2 + bst r17,2 + bld r2,7 + bst r17,4 + bld r8,2 + bst r17,6 + bld r13,5 + bst r17,7 + bld r26,2 + ldd r17,Y+20 + bst r17,1 + bld r5,5 + bst r17,3 + bld r11,0 + bst r17,4 + bld r21,5 + bst r17,5 + bld r24,3 + bst r17,6 + bld r3,0 + ldd r17,Y+21 + bst r17,0 + bld r8,3 + bst r17,2 + bld r13,6 + bst r17,3 + bld r26,3 + bst r17,5 + bld r5,6 + bst r17,7 + bld r11,1 + ldd r17,Y+22 + bst r17,0 + bld r21,6 + bst r17,1 + bld r24,4 + bst r17,2 + bld r3,1 + bst r17,4 + bld r8,4 + bst r17,6 + bld r13,7 + bst r17,7 + bld r26,4 + ldd r17,Y+23 + bst r17,1 + bld r5,7 + bst r17,3 + bld r11,2 + bst r17,4 + bld r21,7 + bst r17,5 + bld r24,5 + bst r17,6 + bld r3,2 + ldd r17,Y+24 + bst r17,0 + bld r8,5 + bst r17,2 + bld r14,0 + bst r17,3 + bld r26,5 + bst r17,5 + bld r6,0 + bst r17,7 + bld r11,3 + ldd r17,Y+25 + bst r17,0 + bld r22,0 + bst r17,1 + bld r24,6 + bst r17,2 + bld r3,3 + bst r17,4 + bld r8,6 + bst r17,6 + bld r14,1 + bst r17,7 + bld r26,6 + ldd r17,Y+26 + bst r17,1 + bld r6,1 + bst r17,3 + bld r11,4 + bst r17,4 + bld r22,1 + bst r17,5 + bld r24,7 + bst r17,6 + bld r3,4 + ldd r17,Y+27 + bst r17,0 + bld r8,7 + bst r17,2 + bld r14,2 + bst r17,3 + bld r26,7 + bst r17,5 + bld r6,2 + bst r17,7 + bld r11,5 + ldd r17,Y+28 + bst r17,0 + bld r22,2 + bst r17,1 + bld r25,0 + bst r17,2 + bld r3,5 + bst r17,4 + bld r9,0 + bst r17,6 + bld r14,3 + bst r17,7 + bld r27,0 + ldd r17,Y+29 + bst r17,1 + bld r6,3 + bst r17,3 + bld r11,6 + bst r17,4 + bld r22,3 + bst r17,5 + bld r25,1 + bst r17,6 + bld r3,6 + ldd r17,Y+30 + bst r17,0 + bld r9,1 + bst r17,2 + bld r14,4 + bst r17,3 + bld r27,1 + bst r17,5 + bld r6,4 + bst r17,7 + bld r11,7 + ldd r17,Y+31 + bst r17,0 + bld r22,4 + bst r17,1 + bld r25,2 + bst r17,2 + bld r3,7 + bst r17,4 + bld r9,2 + bst r17,6 + bld r14,5 + bst r17,7 + bld r27,2 + ldd r17,Y+32 + bst r17,1 + bld r6,5 + bst r17,3 + bld r12,0 + bst r17,4 + bld r22,5 + bst r17,5 + bld r25,3 + bst r17,6 + bld r4,0 + bst r18,0 + bld r9,3 + st Z,r20 + std Z+1,r21 + std Z+2,r22 + std Z+3,r23 + std Z+4,r26 + std Z+5,r27 + std Z+6,r2 + std Z+7,r3 + std Z+8,r4 + std Z+9,r5 + std Z+10,r6 + std Z+11,r7 + std Z+12,r8 + std Z+13,r9 + std Z+14,r10 + std Z+15,r11 + std Z+16,r12 + std Z+17,r13 + std Z+18,r14 + std Z+19,r15 + std Z+20,r24 + std Z+21,r25 + std Z+22,r16 + mov r5,r1 + ldd r17,Y+1 + bst r17,3 + bld r21,1 + bst r17,5 + bld r2,4 + ldd r17,Y+2 + bst r17,2 + bld r23,7 + bst r17,7 + bld r21,2 + ldd r17,Y+3 + bst r17,1 + bld r2,5 + bst r17,6 + bld r26,0 + ldd r17,Y+4 + bst r17,3 + bld r21,3 + bst r17,5 + bld r2,6 + ldd r17,Y+5 + bst r17,2 + bld r26,1 + bst r17,7 + bld r21,4 + ldd r17,Y+6 + bst r17,1 + bld r2,7 + bst r17,6 + bld r26,2 + ldd r17,Y+7 + bst r17,3 + bld r21,5 + bst r17,5 + bld r3,0 + ldd r17,Y+8 + bst r17,2 + bld r26,3 + bst r17,7 + bld r21,6 + ldd r17,Y+9 + bst r17,1 + bld r3,1 + bst r17,6 + bld r26,4 + ldd r17,Y+10 + bst r17,3 + bld r21,7 + bst r17,5 + bld r3,2 + ldd r17,Y+11 + bst r17,2 + bld r26,5 + bst r17,7 + bld r22,0 + ldd r17,Y+12 + bst r17,1 + bld r3,3 + bst r17,6 + bld r26,6 + ldd r17,Y+13 + bst r17,3 + bld r22,1 + bst r17,5 + bld r3,4 + ldd r17,Y+14 + bst r17,2 + bld r26,7 + bst r17,7 + bld r22,2 + ldd r17,Y+15 + bst r17,1 + bld r3,5 + bst r17,6 + bld r27,0 + ldd r17,Y+16 + bst r17,3 + bld r22,3 + bst r17,5 + bld r3,6 + ldd r17,Y+17 + bst r17,2 + bld r27,1 + bst r17,7 + bld r22,4 + ldd r17,Y+18 + bst r17,1 + bld r3,7 + bst r17,6 + bld r27,2 + ldd r17,Y+19 + bst r17,3 + bld r22,5 + bst r17,5 + bld r4,0 + ldd r17,Y+20 + bst r17,0 + bld r20,0 + bst r17,2 + bld r27,3 + bst r17,7 + bld r22,6 + ldd r17,Y+21 + bst r17,1 + bld r4,1 + bst r17,4 + bld r20,1 + bst r17,6 + bld r27,4 + ldd r17,Y+22 + bst r17,3 + bld r22,7 + bst r17,5 + bld r4,2 + ldd r17,Y+23 + bst r17,0 + bld r20,2 + bst r17,2 + bld r27,5 + bst r17,7 + bld r23,0 + ldd r17,Y+24 + bst r17,1 + bld r4,3 + bst r17,4 + bld r20,3 + bst r17,6 + bld r27,6 + ldd r17,Y+25 + bst r17,3 + bld r23,1 + bst r17,5 + bld r4,4 + ldd r17,Y+26 + bst r17,0 + bld r20,4 + bst r17,2 + bld r27,7 + bst r17,7 + bld r23,2 + ldd r17,Y+27 + bst r17,1 + bld r4,5 + bst r17,4 + bld r20,5 + bst r17,6 + bld r2,0 + ldd r17,Y+28 + bst r17,3 + bld r23,3 + bst r17,5 + bld r4,6 + ldd r17,Y+29 + bst r17,0 + bld r20,6 + bst r17,2 + bld r2,1 + bst r17,7 + bld r23,4 + ldd r17,Y+30 + bst r17,1 + bld r4,7 + bst r17,4 + bld r20,7 + bst r17,6 + bld r2,2 + ldd r17,Y+31 + bst r17,3 + bld r23,5 + bst r17,5 + bld r5,0 + ldd r17,Y+32 + bst r17,0 + bld r21,0 + bst r17,2 + bld r2,3 + bst r17,7 + bld r23,6 + std Z+23,r20 + std Z+24,r21 + std Z+25,r22 + std Z+26,r23 + std Z+27,r26 + std Z+28,r27 + std Z+29,r2 + std Z+30,r3 + std Z+31,r4 + std Z+32,r5 + adiw r28,32 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size subterranean_round, .-subterranean_round + + .text +.global subterranean_absorb_1 + .type subterranean_absorb_1, @function +subterranean_absorb_1: + movw r30,r24 +.L__stack_usage = 2 + ldi r23,1 + mov r18,r1 + bst r22,0 + bld r18,1 + ld r0,Z + eor r0,r18 + st Z,r0 + mov r18,r1 + bst r22,3 + bld r18,3 + ldd r0,Z+4 + eor r0,r18 + std Z+4,r0 + mov r18,r1 + bst r23,0 + bld r18,0 + ldd r0,Z+8 + eor r0,r18 + std Z+8,r0 + mov r18,r1 + bst r22,5 + bld r18,6 + ldd r0,Z+16 + eor r0,r18 + std Z+16,r0 + mov r18,r1 + bst r22,2 + bld r18,0 + ldd r0,Z+17 + eor r0,r18 + std Z+17,r0 + mov r18,r1 + bst r22,1 + bld r18,0 + ldd r0,Z+22 + eor r0,r18 + std Z+22,r0 + mov r18,r1 + bst r22,6 + bld r18,5 + ldd r0,Z+24 + eor r0,r18 + std Z+24,r0 + mov r18,r1 + bst r22,7 + bld r18,2 + ldd r0,Z+29 + eor r0,r18 + std Z+29,r0 + mov r18,r1 + bst r22,4 + bld r18,1 + ldd r0,Z+31 + eor r0,r18 + std Z+31,r0 + ret + .size subterranean_absorb_1, .-subterranean_absorb_1 + + .text +.global subterranean_absorb_word + .type subterranean_absorb_word, @function +subterranean_absorb_word: + movw r30,r24 +.L__stack_usage = 2 + mov r18,r1 + bst r20,0 + bld r18,1 + bst r21,4 + bld r18,2 + bst r23,0 + bld r18,4 + ld r0,Z + eor r0,r18 + st Z,r0 + mov r18,r1 + bst r22,1 + bld r18,3 + bst r21,6 + bld r18,7 + ldd r0,Z+1 + eor r0,r18 + std Z+1,r0 + mov r18,r1 + bst r23,6 + bld r18,1 + bst r23,5 + bld r18,6 + ldd r0,Z+2 + eor r0,r18 + std Z+2,r0 + mov r18,r1 + bst r23,2 + bld r18,6 + ldd r0,Z+3 + eor r0,r18 + std Z+3,r0 + mov r18,r1 + bst r20,3 + bld r18,3 + ldd r0,Z+4 + eor r0,r18 + std Z+4,r0 + mov r18,r1 + bst r21,0 + bld r18,0 + bst r21,7 + bld r18,6 + ldd r0,Z+8 + eor r0,r18 + std Z+8,r0 + mov r18,r1 + bst r21,5 + bld r18,7 + ldd r0,Z+11 + eor r0,r18 + std Z+11,r0 + mov r18,r1 + bst r22,7 + bld r18,7 + ldd r0,Z+13 + eor r0,r18 + std Z+13,r0 + mov r18,r1 + bst r22,4 + bld r18,0 + bst r20,5 + bld r18,6 + ldd r0,Z+16 + eor r0,r18 + std Z+16,r0 + mov r18,r1 + bst r20,2 + bld r18,0 + bst r22,2 + bld r18,1 + bst r23,3 + bld r18,4 + ldd r0,Z+17 + eor r0,r18 + std Z+17,r0 + mov r18,r1 + bst r23,7 + bld r18,5 + ldd r0,Z+20 + eor r0,r18 + std Z+20,r0 + mov r18,r1 + bst r22,5 + bld r18,1 + ldd r0,Z+21 + eor r0,r18 + std Z+21,r0 + mov r18,r1 + bst r20,1 + bld r18,0 + ldd r0,Z+22 + eor r0,r18 + std Z+22,r0 + mov r18,r1 + bst r21,3 + bld r18,0 + bst r22,6 + bld r18,5 + bst r23,1 + bld r18,6 + ldd r0,Z+23 + eor r0,r18 + std Z+23,r0 + mov r18,r1 + bst r20,6 + bld r18,5 + ldd r0,Z+24 + eor r0,r18 + std Z+24,r0 + mov r18,r1 + bst r22,3 + bld r18,3 + bst r21,1 + bld r18,5 + ldd r0,Z+26 + eor r0,r18 + std Z+26,r0 + mov r18,r1 + bst r21,2 + bld r18,7 + ldd r0,Z+27 + eor r0,r18 + std Z+27,r0 + mov r18,r1 + bst r23,4 + bld r18,1 + ldd r0,Z+28 + eor r0,r18 + std Z+28,r0 + mov r18,r1 + bst r20,7 + bld r18,2 + ldd r0,Z+29 + eor r0,r18 + std Z+29,r0 + mov r18,r1 + bst r22,0 + bld r18,1 + ldd r0,Z+30 + eor r0,r18 + std Z+30,r0 + mov r18,r1 + bst r20,4 + bld r18,1 + ldd r0,Z+31 + eor r0,r18 + std Z+31,r0 + ret + .size subterranean_absorb_word, .-subterranean_absorb_word + + .text +.global subterranean_extract + .type subterranean_extract, @function +subterranean_extract: + movw r30,r24 +.L__stack_usage = 2 + ld r26,Z + bst r26,1 + bld r22,0 + bst r26,2 + bld r23,4 + bst r26,4 + bld r25,0 + ldd r26,Z+1 + bst r26,0 + bld r18,4 + bst r26,3 + bld r24,1 + bst r26,7 + bld r23,6 + ldd r26,Z+2 + bst r26,0 + bld r20,0 + bst r26,1 + bld r25,6 + bst r26,6 + bld r25,5 + bst r26,7 + bld r18,7 + ldd r26,Z+3 + bst r26,6 + bld r25,2 + ldd r26,Z+4 + bst r26,0 + bld r21,4 + bst r26,2 + bld r19,2 + bst r26,3 + bld r22,3 + ldd r26,Z+5 + bst r26,4 + bld r19,1 + bst r26,6 + bld r20,3 + ldd r26,Z+7 + bst r26,4 + bld r18,6 + ldd r26,Z+8 + bst r26,0 + bld r23,0 + bst r26,3 + bld r21,1 + bst r26,4 + bld r20,6 + bst r26,6 + bld r23,7 + ldd r26,Z+9 + bst r26,1 + bld r19,3 + ldd r26,Z+10 + bst r26,1 + bld r18,1 + ldd r26,Z+11 + bst r26,0 + bld r20,5 + bst r26,4 + bld r21,7 + bst r26,7 + bld r23,5 + ldd r26,Z+13 + bst r26,7 + bld r24,7 + ldd r26,Z+14 + bst r26,5 + bld r21,3 + ldd r26,Z+15 + bst r26,0 + bld r20,2 + bst r26,1 + bld r18,2 + bst r26,3 + bld r18,5 + ldd r26,Z+16 + bst r26,0 + bld r24,4 + bst r26,1 + bld r20,4 + bst r26,6 + bld r22,5 + ldd r26,Z+17 + bst r26,0 + bld r22,2 + bst r26,1 + bld r24,2 + bst r26,4 + bld r25,3 + ldd r26,Z+18 + bst r26,2 + bld r20,7 + ldd r26,Z+20 + bst r26,2 + bld r19,5 + bst r26,5 + bld r25,7 + ldd r26,Z+21 + bst r26,1 + bld r24,5 + ldd r26,Z+22 + bst r26,0 + bld r22,1 + ldd r26,Z+23 + bst r26,0 + bld r23,3 + bst r26,3 + bld r19,7 + bst r26,5 + bld r24,6 + bst r26,6 + bld r25,1 + ldd r26,Z+24 + bst r26,1 + bld r19,0 + bst r26,5 + bld r22,6 + ldd r26,Z+26 + bst r26,3 + bld r24,3 + bst r26,5 + bld r23,1 + ldd r26,Z+27 + bst r26,6 + bld r18,3 + bst r26,7 + bld r23,2 + ldd r26,Z+28 + bst r26,1 + bld r25,4 + bst r26,3 + bld r21,2 + ldd r26,Z+29 + bst r26,2 + bld r22,7 + bst r26,3 + bld r21,5 + ldd r26,Z+30 + bst r26,0 + bld r21,6 + bst r26,1 + bld r24,0 + bst r26,2 + bld r19,6 + bst r26,6 + bld r20,1 + ldd r26,Z+31 + bst r26,1 + bld r22,4 + bst r26,5 + bld r21,0 + bst r26,7 + bld r19,4 + ldd r26,Z+32 + bst r26,0 + bld r18,0 + eor r22,r18 + eor r23,r19 + eor r24,r20 + eor r25,r21 + ret + .size subterranean_extract, .-subterranean_extract + +#endif diff --git a/subterranean/Implementations/crypto_hash/subterraneanv1/rhys/internal-subterranean.c b/subterranean/Implementations/crypto_hash/subterraneanv1/rhys/internal-subterranean.c index 1cb64e2..71b1c4c 100644 --- a/subterranean/Implementations/crypto_hash/subterraneanv1/rhys/internal-subterranean.c +++ b/subterranean/Implementations/crypto_hash/subterraneanv1/rhys/internal-subterranean.c @@ -23,6 +23,8 @@ #include "internal-subterranean.h" #include +#if !defined(__AVR__) + void subterranean_round(subterranean_state_t *state) { uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8; @@ -168,28 +170,10 @@ void subterranean_round(subterranean_state_t *state) state->x[8] = BDN(x7, 21, 0); } -void subterranean_blank(subterranean_state_t *state) -{ - unsigned round; - for (round = 0; round < 8; ++round) { - subterranean_round(state); - state->x[0] ^= 0x02; /* padding for an empty block is in state bit 1 */ - } -} - -void subterranean_duplex_0(subterranean_state_t *state) -{ - subterranean_round(state); - state->x[0] ^= 0x02; /* padding for an empty block is in state bit 1 */ -} - -void subterranean_duplex_1(subterranean_state_t *state, unsigned char data) +void subterranean_absorb_1(subterranean_state_t *state, unsigned char data) { uint32_t x = data; - /* Perform a single Subterranean round before absorbing the bits */ - subterranean_round(state); - /* Rearrange the bits and absorb them into the state */ state->x[0] ^= (x << 1) & 0x00000002U; state->x[1] ^= x & 0x00000008U; @@ -200,13 +184,10 @@ void subterranean_duplex_1(subterranean_state_t *state, unsigned char data) state->x[7] ^= ((x << 21) & 0x02000000U) ^ ((x << 3) & 0x00000400U); } -void subterranean_duplex_word(subterranean_state_t *state, uint32_t x) +void subterranean_absorb_word(subterranean_state_t *state, uint32_t x) { uint32_t y; - /* Perform a single Subterranean round before absorbing the bits */ - subterranean_round(state); - /* To absorb the word into the state, we first rearrange the source * bits to be in the right target bit positions. Then we mask and * XOR them into the relevant words of the state. @@ -258,39 +239,6 @@ void subterranean_duplex_word(subterranean_state_t *state, uint32_t x) state->x[7] ^= (y & 0x02000400U) ^ (x & 0x00020002U); } -void subterranean_duplex_n - (subterranean_state_t *state, const unsigned char *data, unsigned len) -{ - switch (len) { - case 0: - subterranean_duplex_0(state); - break; - case 1: - subterranean_duplex_1(state, data[0]); - break; - case 2: - /* Load 16 bits and add the padding bit to the 17th bit */ - subterranean_duplex_word - (state, ((uint32_t)(data[0]) | - (((uint32_t)(data[1])) << 8) | - 0x10000U)); - break; - case 3: - /* Load 24 bits and add the padding bit to the 25th bit */ - subterranean_duplex_word - (state, ((uint32_t)(data[0]) | - (((uint32_t)(data[1])) << 8) | - (((uint32_t)(data[2])) << 16) | - 0x01000000U)); - break; - default: - /* Load 32 bits and add the padding bit to the 33rd bit */ - subterranean_duplex_word(state, le_load_word32(data)); - state->x[8] ^= 0x00000001U; - break; - } -} - uint32_t subterranean_extract(subterranean_state_t *state) { uint32_t x, y; @@ -399,12 +347,57 @@ uint32_t subterranean_extract(subterranean_state_t *state) return y ^ state->x[8]; } +#endif /* !__AVR__ */ + +void subterranean_blank(subterranean_state_t *state) +{ + unsigned round; + for (round = 0; round < 8; ++round) { + subterranean_round(state); + state->x[0] ^= 0x02; /* padding for an empty block is in state bit 1 */ + } +} + +void subterranean_duplex_n + (subterranean_state_t *state, const unsigned char *data, unsigned len) +{ + subterranean_round(state); + switch (len) { + case 0: + state->x[0] ^= 0x02; /* padding for an empty block */ + break; + case 1: + subterranean_absorb_1(state, data[0]); + break; + case 2: + /* Load 16 bits and add the padding bit to the 17th bit */ + subterranean_absorb_word + (state, ((uint32_t)(data[0]) | + (((uint32_t)(data[1])) << 8) | + 0x10000U)); + break; + case 3: + /* Load 24 bits and add the padding bit to the 25th bit */ + subterranean_absorb_word + (state, ((uint32_t)(data[0]) | + (((uint32_t)(data[1])) << 8) | + (((uint32_t)(data[2])) << 16) | + 0x01000000U)); + break; + default: + /* Load 32 bits and add the padding bit to the 33rd bit */ + subterranean_absorb_word(state, le_load_word32(data)); + state->x[8] ^= 0x00000001U; + break; + } +} + void subterranean_absorb (subterranean_state_t *state, const unsigned char *data, unsigned long long len) { while (len >= 4) { - subterranean_duplex_4(state, data); + subterranean_duplex_4(state, le_load_word32(data)); data += 4; len -= 4; } diff --git a/subterranean/Implementations/crypto_hash/subterraneanv1/rhys/internal-subterranean.h b/subterranean/Implementations/crypto_hash/subterraneanv1/rhys/internal-subterranean.h index 71cebb2..8ebbd30 100644 --- a/subterranean/Implementations/crypto_hash/subterraneanv1/rhys/internal-subterranean.h +++ b/subterranean/Implementations/crypto_hash/subterraneanv1/rhys/internal-subterranean.h @@ -28,6 +28,8 @@ /** * \file internal-subterranean.h * \brief Internal implementation of the Subterranean block operation. + * + * References: https://cs.ru.nl/~joan/subterranean.html */ #ifdef __cplusplus @@ -66,7 +68,19 @@ void subterranean_blank(subterranean_state_t *state); * * \param state Subterranean state to be transformed. */ -void subterranean_duplex_0(subterranean_state_t *state); +#define subterranean_duplex_0(state) \ + do { \ + subterranean_round((state)); \ + (state)->x[0] ^= 2; /* padding for an empty block */ \ + } while (0) + +/** + * \brief Absorbs a single byte into the Subterranean state. + * + * \param state Subterranean state to be transformed. + * \param data The single byte to be absorbed. + */ +void subterranean_absorb_1(subterranean_state_t *state, unsigned char data); /** * \brief Performs a single Subterranean round and absorbs one byte. @@ -74,7 +88,11 @@ void subterranean_duplex_0(subterranean_state_t *state); * \param state Subterranean state to be transformed. * \param data The single byte to be absorbed. */ -void subterranean_duplex_1(subterranean_state_t *state, unsigned char data); +#define subterranean_duplex_1(state, data) \ + do { \ + subterranean_round((state)); \ + subterranean_absorb_1((state), (data)); \ + } while (0) /** * \brief Absorbs a 32-bit word into the Subterranean state. @@ -82,17 +100,30 @@ void subterranean_duplex_1(subterranean_state_t *state, unsigned char data); * \param state Subterranean state to be transformed. * \param x The word to absorb into the state. */ -void subterranean_duplex_word(subterranean_state_t *state, uint32_t x); +void subterranean_absorb_word(subterranean_state_t *state, uint32_t x); + +/** + * \brief Absorbs a 32-bit word into the Subterranean state after performing + * the round function. + * + * \param state Subterranean state to be transformed. + * \param x The word to absorb into the state. + */ +#define subterranean_duplex_word(state, x) \ + do { \ + subterranean_round((state)); \ + subterranean_absorb_word((state), (x)); \ + } while (0) /** * \brief Performs a single Subterranean round and absorbs four bytes. * * \param state Subterranean state to be transformed. - * \param data Points to the four data bytes to be absorbed. + * \param data 32-bit word containing the four data bytes to be absorbed. */ #define subterranean_duplex_4(state, data) \ do { \ - subterranean_duplex_word((state), le_load_word32((data))); \ + subterranean_duplex_word((state), (data)); \ (state)->x[8] ^= 1; \ } while (0) diff --git a/subterranean/Implementations/crypto_hash/subterraneanv1/rhys/subterranean.c b/subterranean/Implementations/crypto_hash/subterraneanv1/rhys/subterranean.c index 1bc9fc4..aad147a 100644 --- a/subterranean/Implementations/crypto_hash/subterraneanv1/rhys/subterranean.c +++ b/subterranean/Implementations/crypto_hash/subterraneanv1/rhys/subterranean.c @@ -75,8 +75,7 @@ int subterranean_aead_encrypt while (mlen >= 4) { x1 = le_load_word32(m); x2 = subterranean_extract(&state) ^ x1; - subterranean_duplex_word(&state, x1); - state.x[8] ^= 1; /* padding for 32-bit blocks */ + subterranean_duplex_4(&state, x1); le_store_word32(c, x2); c += 4; m += 4; @@ -146,8 +145,7 @@ int subterranean_aead_decrypt while (clen >= 4) { x = le_load_word32(c); x ^= subterranean_extract(&state); - subterranean_duplex_word(&state, x); - state.x[8] ^= 1; /* padding for 32-bit blocks */ + subterranean_duplex_4(&state, x); le_store_word32(m, x); c += 4; m += 4; diff --git a/subterranean/Implementations/crypto_hash/subterraneanv1/rhys/subterranean.h b/subterranean/Implementations/crypto_hash/subterraneanv1/rhys/subterranean.h index 148e5e8..3b35b42 100644 --- a/subterranean/Implementations/crypto_hash/subterraneanv1/rhys/subterranean.h +++ b/subterranean/Implementations/crypto_hash/subterraneanv1/rhys/subterranean.h @@ -38,6 +38,8 @@ * * The Subterranean permutation is intended for hardware implementation. * It is not structured for efficient software implementation. + * + * References: https://cs.ru.nl/~joan/subterranean.html */ #ifdef __cplusplus diff --git a/wage/Implementations/crypto_aead/wageae128v1/rhys/internal-wage-avr.S b/wage/Implementations/crypto_aead/wageae128v1/rhys/internal-wage-avr.S new file mode 100644 index 0000000..4b6c72f --- /dev/null +++ b/wage/Implementations/crypto_aead/wageae128v1/rhys/internal-wage-avr.S @@ -0,0 +1,1411 @@ +#if defined(__AVR__) +#include +/* Automatically generated - do not edit */ + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_0, @object + .size table_0, 256 +table_0: + .byte 46 + .byte 28 + .byte 109 + .byte 43 + .byte 53 + .byte 7 + .byte 127 + .byte 59 + .byte 40 + .byte 8 + .byte 11 + .byte 95 + .byte 49 + .byte 17 + .byte 27 + .byte 77 + .byte 110 + .byte 84 + .byte 13 + .byte 9 + .byte 31 + .byte 69 + .byte 117 + .byte 83 + .byte 106 + .byte 93 + .byte 97 + .byte 0 + .byte 4 + .byte 120 + .byte 6 + .byte 30 + .byte 55 + .byte 111 + .byte 47 + .byte 73 + .byte 100 + .byte 52 + .byte 125 + .byte 25 + .byte 57 + .byte 51 + .byte 67 + .byte 87 + .byte 96 + .byte 98 + .byte 19 + .byte 5 + .byte 119 + .byte 71 + .byte 79 + .byte 75 + .byte 29 + .byte 45 + .byte 36 + .byte 72 + .byte 116 + .byte 88 + .byte 37 + .byte 94 + .byte 90 + .byte 118 + .byte 65 + .byte 66 + .byte 39 + .byte 62 + .byte 108 + .byte 1 + .byte 44 + .byte 60 + .byte 78 + .byte 26 + .byte 33 + .byte 42 + .byte 10 + .byte 85 + .byte 58 + .byte 56 + .byte 24 + .byte 126 + .byte 12 + .byte 99 + .byte 103 + .byte 86 + .byte 80 + .byte 124 + .byte 50 + .byte 122 + .byte 104 + .byte 2 + .byte 107 + .byte 23 + .byte 123 + .byte 89 + .byte 113 + .byte 15 + .byte 48 + .byte 16 + .byte 34 + .byte 61 + .byte 64 + .byte 105 + .byte 82 + .byte 20 + .byte 54 + .byte 68 + .byte 70 + .byte 3 + .byte 22 + .byte 101 + .byte 102 + .byte 114 + .byte 18 + .byte 14 + .byte 41 + .byte 74 + .byte 76 + .byte 112 + .byte 21 + .byte 38 + .byte 121 + .byte 81 + .byte 35 + .byte 63 + .byte 115 + .byte 91 + .byte 32 + .byte 92 + .byte 0 + .byte 18 + .byte 10 + .byte 75 + .byte 102 + .byte 12 + .byte 72 + .byte 115 + .byte 121 + .byte 62 + .byte 97 + .byte 81 + .byte 1 + .byte 21 + .byte 23 + .byte 14 + .byte 126 + .byte 51 + .byte 104 + .byte 54 + .byte 66 + .byte 53 + .byte 55 + .byte 94 + .byte 83 + .byte 76 + .byte 63 + .byte 84 + .byte 88 + .byte 110 + .byte 86 + .byte 42 + .byte 29 + .byte 37 + .byte 109 + .byte 101 + .byte 91 + .byte 113 + .byte 47 + .byte 32 + .byte 6 + .byte 24 + .byte 41 + .byte 58 + .byte 13 + .byte 122 + .byte 108 + .byte 27 + .byte 25 + .byte 67 + .byte 112 + .byte 65 + .byte 73 + .byte 34 + .byte 119 + .byte 96 + .byte 79 + .byte 69 + .byte 85 + .byte 2 + .byte 99 + .byte 71 + .byte 117 + .byte 45 + .byte 64 + .byte 70 + .byte 125 + .byte 92 + .byte 124 + .byte 89 + .byte 38 + .byte 11 + .byte 9 + .byte 3 + .byte 87 + .byte 93 + .byte 39 + .byte 120 + .byte 48 + .byte 46 + .byte 68 + .byte 82 + .byte 59 + .byte 8 + .byte 103 + .byte 44 + .byte 5 + .byte 107 + .byte 43 + .byte 26 + .byte 33 + .byte 56 + .byte 7 + .byte 15 + .byte 74 + .byte 17 + .byte 80 + .byte 106 + .byte 40 + .byte 49 + .byte 16 + .byte 77 + .byte 95 + .byte 114 + .byte 57 + .byte 22 + .byte 90 + .byte 19 + .byte 4 + .byte 60 + .byte 52 + .byte 31 + .byte 118 + .byte 30 + .byte 20 + .byte 35 + .byte 28 + .byte 50 + .byte 78 + .byte 123 + .byte 36 + .byte 116 + .byte 127 + .byte 61 + .byte 105 + .byte 100 + .byte 98 + .byte 111 + + .section .progmem.data,"a",@progbits + .p2align 8 + .type table_1, @object + .size table_1, 222 +table_1: + .byte 127 + .byte 63 + .byte 31 + .byte 15 + .byte 7 + .byte 3 + .byte 1 + .byte 64 + .byte 32 + .byte 16 + .byte 8 + .byte 4 + .byte 2 + .byte 65 + .byte 96 + .byte 48 + .byte 24 + .byte 12 + .byte 6 + .byte 67 + .byte 33 + .byte 80 + .byte 40 + .byte 20 + .byte 10 + .byte 69 + .byte 98 + .byte 113 + .byte 120 + .byte 60 + .byte 30 + .byte 79 + .byte 39 + .byte 19 + .byte 9 + .byte 68 + .byte 34 + .byte 81 + .byte 104 + .byte 52 + .byte 26 + .byte 77 + .byte 102 + .byte 115 + .byte 57 + .byte 92 + .byte 46 + .byte 87 + .byte 43 + .byte 21 + .byte 74 + .byte 101 + .byte 114 + .byte 121 + .byte 124 + .byte 62 + .byte 95 + .byte 47 + .byte 23 + .byte 11 + .byte 5 + .byte 66 + .byte 97 + .byte 112 + .byte 56 + .byte 28 + .byte 14 + .byte 71 + .byte 35 + .byte 17 + .byte 72 + .byte 36 + .byte 18 + .byte 73 + .byte 100 + .byte 50 + .byte 89 + .byte 108 + .byte 54 + .byte 91 + .byte 45 + .byte 86 + .byte 107 + .byte 53 + .byte 90 + .byte 109 + .byte 118 + .byte 123 + .byte 61 + .byte 94 + .byte 111 + .byte 55 + .byte 27 + .byte 13 + .byte 70 + .byte 99 + .byte 49 + .byte 88 + .byte 44 + .byte 22 + .byte 75 + .byte 37 + .byte 82 + .byte 105 + .byte 116 + .byte 58 + .byte 93 + .byte 110 + .byte 119 + .byte 59 + .byte 29 + .byte 78 + .byte 103 + .byte 51 + .byte 25 + .byte 76 + .byte 38 + .byte 83 + .byte 41 + .byte 84 + .byte 42 + .byte 85 + .byte 106 + .byte 117 + .byte 122 + .byte 125 + .byte 126 + .byte 127 + .byte 63 + .byte 31 + .byte 15 + .byte 7 + .byte 3 + .byte 1 + .byte 64 + .byte 32 + .byte 16 + .byte 8 + .byte 4 + .byte 2 + .byte 65 + .byte 96 + .byte 48 + .byte 24 + .byte 12 + .byte 6 + .byte 67 + .byte 33 + .byte 80 + .byte 40 + .byte 20 + .byte 10 + .byte 69 + .byte 98 + .byte 113 + .byte 120 + .byte 60 + .byte 30 + .byte 79 + .byte 39 + .byte 19 + .byte 9 + .byte 68 + .byte 34 + .byte 81 + .byte 104 + .byte 52 + .byte 26 + .byte 77 + .byte 102 + .byte 115 + .byte 57 + .byte 92 + .byte 46 + .byte 87 + .byte 43 + .byte 21 + .byte 74 + .byte 101 + .byte 114 + .byte 121 + .byte 124 + .byte 62 + .byte 95 + .byte 47 + .byte 23 + .byte 11 + .byte 5 + .byte 66 + .byte 97 + .byte 112 + .byte 56 + .byte 28 + .byte 14 + .byte 71 + .byte 35 + .byte 17 + .byte 72 + .byte 36 + .byte 18 + .byte 73 + .byte 100 + .byte 50 + .byte 89 + .byte 108 + .byte 54 + .byte 91 + .byte 45 + .byte 86 + .byte 107 + .byte 53 + .byte 90 + .byte 109 + .byte 118 + .byte 123 + .byte 61 + .byte 94 + .byte 111 + .byte 55 + .byte 27 + .byte 13 + .byte 70 + + .text +.global wage_permute + .type wage_permute, @function +wage_permute: + push r28 + push r29 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + movw r30,r24 + in r28,0x3d + in r29,0x3e + sbiw r28,37 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 +.L__stack_usage = 55 + ld r20,Z + ldd r21,Z+1 + ldd r22,Z+2 + std Y+1,r20 + std Y+2,r21 + std Y+3,r22 + ldd r20,Z+3 + ldd r21,Z+4 + ldd r22,Z+5 + std Y+4,r20 + std Y+5,r21 + std Y+6,r22 + ldd r20,Z+6 + ldd r21,Z+7 + ldd r22,Z+8 + std Y+7,r20 + std Y+8,r21 + std Y+9,r22 + ldd r20,Z+9 + ldd r21,Z+10 + ldd r22,Z+11 + std Y+10,r20 + std Y+11,r21 + std Y+12,r22 + ldd r20,Z+12 + ldd r21,Z+13 + ldd r22,Z+14 + std Y+13,r20 + std Y+14,r21 + std Y+15,r22 + ldd r20,Z+15 + ldd r21,Z+16 + ldd r22,Z+17 + std Y+16,r20 + std Y+17,r21 + std Y+18,r22 + ldd r20,Z+18 + ldd r21,Z+19 + ldd r22,Z+20 + std Y+19,r20 + std Y+20,r21 + std Y+21,r22 + ldd r20,Z+21 + ldd r21,Z+22 + ldd r22,Z+23 + std Y+22,r20 + std Y+23,r21 + std Y+24,r22 + ldd r20,Z+24 + ldd r21,Z+25 + ldd r22,Z+26 + std Y+25,r20 + std Y+26,r21 + std Y+27,r22 + ldd r20,Z+27 + ldd r21,Z+28 + ldd r22,Z+29 + std Y+28,r20 + std Y+29,r21 + std Y+30,r22 + ldd r20,Z+30 + ldd r21,Z+31 + ldd r22,Z+32 + std Y+31,r20 + std Y+32,r21 + std Y+33,r22 + ldd r20,Z+33 + ldd r21,Z+34 + ldd r22,Z+35 + std Y+34,r20 + std Y+35,r21 + std Y+36,r22 + ldd r20,Z+36 + std Y+37,r20 + push r31 + push r30 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r23,hh8(table_0) + in r0,_SFR_IO_ADDR(RAMPZ) + push r0 + out _SFR_IO_ADDR(RAMPZ),r23 +#endif + mov r19,r1 +78: + ldd r20,Y+1 + mov r18,r1 + lsr r20 + sbc r18,r1 + andi r18,120 + eor r20,r18 + ldd r23,Y+7 + eor r20,r23 + ldd r26,Y+9 + eor r20,r26 + ldd r27,Y+13 + eor r20,r27 + ldd r2,Y+14 + eor r20,r2 + ldd r3,Y+20 + eor r20,r3 + ldd r4,Y+25 + eor r20,r4 + ldd r5,Y+27 + eor r20,r5 + ldd r6,Y+31 + eor r20,r6 + ldd r7,Y+32 + eor r20,r7 + ldd r21,Y+2 + mov r18,r1 + lsr r21 + sbc r18,r1 + andi r18,120 + eor r21,r18 + ldd r8,Y+8 + eor r21,r8 + ldd r9,Y+10 + eor r21,r9 + eor r21,r2 + ldd r10,Y+15 + eor r21,r10 + ldd r11,Y+21 + eor r21,r11 + ldd r12,Y+26 + eor r21,r12 + ldd r13,Y+28 + eor r21,r13 + eor r21,r7 + ldd r14,Y+33 + eor r21,r14 + ldd r22,Y+3 + mov r18,r1 + lsr r22 + sbc r18,r1 + andi r18,120 + eor r22,r18 + eor r22,r26 + ldd r15,Y+11 + eor r22,r15 + eor r22,r10 + ldd r24,Y+16 + eor r22,r24 + ldd r25,Y+22 + eor r22,r25 + eor r22,r5 + ldd r16,Y+29 + eor r22,r16 + eor r22,r14 + ldd r17,Y+34 + eor r22,r17 + mov r30,r26 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + ldd r23,Y+6 + eor r23,r18 + mov r30,r9 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + ldd r27,Y+7 + eor r27,r18 + mov r30,r15 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r8,r18 + mov r30,r24 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + ldd r3,Y+12 + eor r3,r18 + ldd r4,Y+17 + mov r30,r4 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + ldd r6,Y+13 + eor r6,r18 + ldd r2,Y+18 + mov r30,r2 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + ldd r11,Y+14 + eor r11,r18 + mov r30,r13 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + ldd r12,Y+25 + eor r12,r18 + mov r30,r16 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + ldd r7,Y+26 + eor r7,r18 + ldd r10,Y+30 + mov r30,r10 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r5,r18 + ldd r25,Y+35 + mov r30,r25 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + ldd r14,Y+31 + eor r14,r18 + ldd r17,Y+36 + mov r30,r17 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + ldd r26,Y+32 + eor r26,r18 + ldd r9,Y+37 + mov r30,r9 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + ldd r15,Y+33 + eor r15,r18 + ldi r30,lo8(table_1) + ldi r31,hi8(table_1) +#if defined(RAMPZ) + ldi r24,hh8(table_1) + out _SFR_IO_ADDR(RAMPZ),r24 +#endif + mov r30,r19 +#if defined(RAMPZ) + elpm r2,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r2,Z +#elif defined(__AVR_TINY__) + ld r2,Z +#else + lpm + mov r2,r0 +#endif + inc r19 + mov r30,r19 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r20,r18 + inc r19 + mov r30,r19 +#if defined(RAMPZ) + elpm r4,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r4,Z +#elif defined(__AVR_TINY__) + ld r4,Z +#else + lpm + mov r4,r0 +#endif + inc r19 + mov r30,r19 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r21,r18 + inc r19 + mov r30,r19 +#if defined(RAMPZ) + elpm r24,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r24,Z +#elif defined(__AVR_TINY__) + ld r24,Z +#else + lpm + mov r24,r0 +#endif + inc r19 + mov r30,r19 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r22,r18 + inc r19 + ldi r30,lo8(table_0) + ldi r31,hi8(table_0) +#if defined(RAMPZ) + ldi r16,hh8(table_0) + out _SFR_IO_ADDR(RAMPZ),r16 +#endif + ldd r16,Y+19 + mov r30,r16 + ori r30,128 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + ldd r13,Y+20 + eor r13,r18 + eor r13,r2 + mov r30,r13 + ori r30,128 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + ldd r2,Y+21 + eor r2,r18 + eor r2,r4 + mov r30,r2 + ori r30,128 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + ldd r4,Y+22 + eor r4,r18 + eor r4,r24 + mov r30,r9 + ori r30,128 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r20,r18 + mov r30,r20 + ori r30,128 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r21,r18 + mov r30,r21 + ori r30,128 +#if defined(RAMPZ) + elpm r18,Z +#elif defined(__AVR_HAVE_LPMX__) + lpm r18,Z +#elif defined(__AVR_TINY__) + ld r18,Z +#else + lpm + mov r18,r0 +#endif + eor r22,r18 + ldd r18,Y+4 + std Y+1,r18 + ldd r18,Y+5 + std Y+2,r18 + std Y+3,r23 + std Y+4,r27 + std Y+5,r8 + ldd r18,Y+9 + std Y+6,r18 + ldd r18,Y+10 + std Y+7,r18 + ldd r18,Y+11 + std Y+8,r18 + std Y+9,r3 + std Y+10,r6 + std Y+11,r11 + ldd r18,Y+15 + std Y+12,r18 + ldd r18,Y+16 + std Y+13,r18 + ldd r18,Y+17 + std Y+14,r18 + ldd r18,Y+18 + std Y+15,r18 + std Y+16,r16 + std Y+17,r13 + std Y+18,r2 + std Y+19,r4 + ldd r18,Y+23 + std Y+20,r18 + ldd r18,Y+24 + std Y+21,r18 + std Y+22,r12 + std Y+23,r7 + std Y+24,r5 + ldd r18,Y+28 + std Y+25,r18 + ldd r18,Y+29 + std Y+26,r18 + std Y+27,r10 + std Y+28,r14 + std Y+29,r26 + std Y+30,r15 + ldd r18,Y+34 + std Y+31,r18 + std Y+32,r25 + std Y+33,r17 + std Y+34,r9 + std Y+35,r20 + std Y+36,r21 + std Y+37,r22 + ldi r27,222 + cpse r19,r27 + rjmp 78b +#if defined(RAMPZ) + pop r0 + out _SFR_IO_ADDR(RAMPZ),r0 +#endif + pop r30 + pop r31 + ldd r20,Y+1 + ldd r21,Y+2 + ldd r22,Y+3 + st Z,r20 + std Z+1,r21 + std Z+2,r22 + ldd r20,Y+4 + ldd r21,Y+5 + ldd r22,Y+6 + std Z+3,r20 + std Z+4,r21 + std Z+5,r22 + ldd r20,Y+7 + ldd r21,Y+8 + ldd r22,Y+9 + std Z+6,r20 + std Z+7,r21 + std Z+8,r22 + ldd r20,Y+10 + ldd r21,Y+11 + ldd r22,Y+12 + std Z+9,r20 + std Z+10,r21 + std Z+11,r22 + ldd r20,Y+13 + ldd r21,Y+14 + ldd r22,Y+15 + std Z+12,r20 + std Z+13,r21 + std Z+14,r22 + ldd r20,Y+16 + ldd r21,Y+17 + ldd r22,Y+18 + std Z+15,r20 + std Z+16,r21 + std Z+17,r22 + ldd r20,Y+19 + ldd r21,Y+20 + ldd r22,Y+21 + std Z+18,r20 + std Z+19,r21 + std Z+20,r22 + ldd r20,Y+22 + ldd r21,Y+23 + ldd r22,Y+24 + std Z+21,r20 + std Z+22,r21 + std Z+23,r22 + ldd r20,Y+25 + ldd r21,Y+26 + ldd r22,Y+27 + std Z+24,r20 + std Z+25,r21 + std Z+26,r22 + ldd r20,Y+28 + ldd r21,Y+29 + ldd r22,Y+30 + std Z+27,r20 + std Z+28,r21 + std Z+29,r22 + ldd r20,Y+31 + ldd r21,Y+32 + ldd r22,Y+33 + std Z+30,r20 + std Z+31,r21 + std Z+32,r22 + ldd r20,Y+34 + ldd r21,Y+35 + ldd r22,Y+36 + std Z+33,r20 + std Z+34,r21 + std Z+35,r22 + ldd r20,Y+37 + std Z+36,r20 + adiw r28,37 + in r0,0x3f + cli + out 0x3e,r29 + out 0x3f,r0 + out 0x3d,r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r29 + pop r28 + ret + .size wage_permute, .-wage_permute + + .text +.global wage_absorb + .type wage_absorb, @function +wage_absorb: + movw r30,r24 + movw r26,r22 +.L__stack_usage = 2 + ld r22,X+ + ld r21,X+ + ld r20,X+ + ld r19,X+ + mov r18,r1 + lsr r22 + ror r21 + ror r20 + ror r19 + ror r18 + ldd r0,Z+8 + eor r0,r22 + std Z+8,r0 + lsr r21 + ror r20 + ror r19 + ror r18 + ldd r0,Z+9 + eor r0,r21 + std Z+9,r0 + lsr r20 + ror r19 + ror r18 + ldd r0,Z+15 + eor r0,r20 + std Z+15,r0 + lsr r19 + ror r18 + ldd r0,Z+16 + eor r0,r19 + std Z+16,r0 + lsr r18 + ldd r0,Z+18 + eor r0,r18 + std Z+18,r0 + ld r22,X+ + ld r21,X+ + ld r20,X+ + ld r19,X+ + mov r23,r1 + mov r18,r1 + lsl r19 + rol r20 + rol r21 + rol r22 + rol r23 + lsl r19 + rol r20 + rol r21 + rol r22 + rol r23 + lsl r19 + rol r20 + rol r21 + rol r22 + rol r23 + ldd r0,Z+18 + eor r0,r23 + std Z+18,r0 + lsr r22 + ror r21 + ror r20 + ror r19 + ldd r0,Z+27 + eor r0,r22 + std Z+27,r0 + lsr r21 + ror r20 + ror r19 + ldd r0,Z+28 + eor r0,r21 + std Z+28,r0 + lsr r20 + ror r19 + ldd r0,Z+34 + eor r0,r20 + std Z+34,r0 + lsr r19 + ror r18 + ldd r0,Z+35 + eor r0,r19 + std Z+35,r0 + lsr r18 + ldd r0,Z+36 + eor r0,r18 + std Z+36,r0 + ret + .size wage_absorb, .-wage_absorb + + .text +.global wage_get_rate + .type wage_get_rate, @function +wage_get_rate: + movw r30,r24 + movw r26,r22 +.L__stack_usage = 2 + ldd r21,Z+8 + ldd r20,Z+9 + ldd r19,Z+15 + ldd r18,Z+16 + lsl r18 + lsl r18 + rol r19 + lsl r18 + rol r19 + rol r20 + lsl r18 + rol r19 + rol r20 + rol r21 + ldd r22,Z+18 + lsr r22 + lsr r22 + lsr r22 + or r18,r22 + st X+,r21 + st X+,r20 + st X+,r19 + st X+,r18 + ldd r21,Z+18 + ldd r20,Z+27 + ldd r19,Z+28 + ldd r18,Z+34 + lsl r18 + lsl r18 + rol r19 + lsl r18 + rol r19 + rol r20 + lsr r21 + ror r20 + ror r19 + ror r18 + lsr r21 + ror r20 + ror r19 + ror r18 + lsr r21 + ror r20 + ror r19 + ror r18 + st X+,r20 + st X+,r19 + st X+,r18 + ldd r19,Z+35 + ldd r18,Z+36 + lsl r18 + lsl r18 + rol r19 + st X+,r19 + ret + .size wage_get_rate, .-wage_get_rate + + .text +.global wage_set_rate + .type wage_set_rate, @function +wage_set_rate: + movw r30,r24 + movw r26,r22 +.L__stack_usage = 2 + ld r22,X+ + ld r21,X+ + ld r20,X+ + ld r19,X+ + mov r18,r1 + lsr r22 + ror r21 + ror r20 + ror r19 + ror r18 + std Z+8,r22 + lsr r21 + ror r20 + ror r19 + ror r18 + std Z+9,r21 + lsr r20 + ror r19 + ror r18 + std Z+15,r20 + lsr r19 + ror r18 + std Z+16,r19 + lsr r18 + std Z+18,r18 + ld r22,X+ + ld r21,X+ + ld r20,X+ + ld r19,X+ + mov r23,r1 + mov r18,r1 + lsl r19 + rol r20 + rol r21 + rol r22 + rol r23 + lsl r19 + rol r20 + rol r21 + rol r22 + rol r23 + lsl r19 + rol r20 + rol r21 + rol r22 + rol r23 + ldd r0,Z+18 + eor r0,r23 + std Z+18,r0 + lsr r22 + ror r21 + ror r20 + ror r19 + std Z+27,r22 + lsr r21 + ror r20 + ror r19 + std Z+28,r21 + lsr r20 + ror r19 + std Z+34,r20 + lsr r19 + ror r18 + std Z+35,r19 + lsr r18 + ldd r24,Z+36 + andi r24,63 + eor r24,r18 + std Z+36,r24 + ret + .size wage_set_rate, .-wage_set_rate + +#endif diff --git a/wage/Implementations/crypto_aead/wageae128v1/rhys/internal-wage.c b/wage/Implementations/crypto_aead/wageae128v1/rhys/internal-wage.c index e9528c9..6741643 100644 --- a/wage/Implementations/crypto_aead/wageae128v1/rhys/internal-wage.c +++ b/wage/Implementations/crypto_aead/wageae128v1/rhys/internal-wage.c @@ -33,6 +33,8 @@ */ #define WAGE_64BIT 1 +#if !defined(__AVR__) + /** * \brief RC0 and RC1 round constants for WAGE, interleaved with each other. */ @@ -287,8 +289,7 @@ void wage_permute(unsigned char s[WAGE_STATE_SIZE]) /* 7-bit components for the rate: 8, 9, 15, 16, 18, 27, 28, 34, 35, 36 */ void wage_absorb - (unsigned char s[WAGE_STATE_SIZE], const unsigned char data[8], - unsigned char domain) + (unsigned char s[WAGE_STATE_SIZE], const unsigned char data[8]) { uint32_t temp; temp = be_load_word32(data); @@ -304,7 +305,6 @@ void wage_absorb s[34] ^= (unsigned char)((temp >> 8) & 0x7F); s[35] ^= (unsigned char)((temp >> 1) & 0x7F); s[36] ^= (unsigned char)((temp << 6) & 0x7F); - s[0] ^= domain; } void wage_get_rate @@ -327,8 +327,7 @@ void wage_get_rate } void wage_set_rate - (unsigned char s[WAGE_STATE_SIZE], const unsigned char data[8], - unsigned char domain) + (unsigned char s[WAGE_STATE_SIZE], const unsigned char data[8]) { uint32_t temp; temp = be_load_word32(data); @@ -344,9 +343,10 @@ void wage_set_rate s[34] = (unsigned char)((temp >> 8) & 0x7F); s[35] = (unsigned char)((temp >> 1) & 0x7F); s[36] = (unsigned char)(((temp << 6) & 0x40) ^ (s[36] & 0x3F)); - s[0] ^= domain; } +#endif /* !__AVR__ */ + /** * \brief Converts a 128-bit value into an array of 7-bit components. * diff --git a/wage/Implementations/crypto_aead/wageae128v1/rhys/internal-wage.h b/wage/Implementations/crypto_aead/wageae128v1/rhys/internal-wage.h index a0d23d7..2663e72 100644 --- a/wage/Implementations/crypto_aead/wageae128v1/rhys/internal-wage.h +++ b/wage/Implementations/crypto_aead/wageae128v1/rhys/internal-wage.h @@ -55,11 +55,9 @@ void wage_permute(unsigned char s[WAGE_STATE_SIZE]); * * \param s The WAGE state to be permuted. * \param data The data to be absorbed. - * \param domain The domain separator for the absorbed data. */ void wage_absorb - (unsigned char s[WAGE_STATE_SIZE], const unsigned char data[8], - unsigned char domain); + (unsigned char s[WAGE_STATE_SIZE], const unsigned char data[8]); /** * \brief Gets the 8 bytes of the rate from the WAGE state. @@ -75,11 +73,9 @@ void wage_get_rate * * \param s The WAGE state to set the rate in. * \param data Points to the bytes to set into the rate. - * \param domain The domain separator for the rate data. */ void wage_set_rate - (unsigned char s[WAGE_STATE_SIZE], const unsigned char data[8], - unsigned char domain); + (unsigned char s[WAGE_STATE_SIZE], const unsigned char data[8]); /** * \brief Absorbs 16 key bytes into the WAGE state. diff --git a/wage/Implementations/crypto_aead/wageae128v1/rhys/wage.c b/wage/Implementations/crypto_aead/wageae128v1/rhys/wage.c index 374409b..bede1c0 100644 --- a/wage/Implementations/crypto_aead/wageae128v1/rhys/wage.c +++ b/wage/Implementations/crypto_aead/wageae128v1/rhys/wage.c @@ -55,7 +55,8 @@ static void wage_process_ad /* Process as many full blocks as possible */ while (adlen >= WAGE_RATE) { - wage_absorb(state, ad, 0x40); + wage_absorb(state, ad); + state[0] ^= 0x40; wage_permute(state); ad += WAGE_RATE; adlen -= WAGE_RATE; @@ -66,7 +67,8 @@ static void wage_process_ad memcpy(pad, ad, temp); pad[temp] = 0x80; memset(pad + temp + 1, 0, WAGE_RATE - temp - 1); - wage_absorb(state, pad, 0x40); + wage_absorb(state, pad); + state[0] ^= 0x40; wage_permute(state); } @@ -95,7 +97,8 @@ int wage_aead_encrypt while (mlen >= WAGE_RATE) { wage_get_rate(state, block); lw_xor_block(block, m, WAGE_RATE); - wage_set_rate(state, block, 0x20); + wage_set_rate(state, block); + state[0] ^= 0x20; wage_permute(state); memcpy(c, block, WAGE_RATE); c += WAGE_RATE; @@ -106,7 +109,8 @@ int wage_aead_encrypt wage_get_rate(state, block); lw_xor_block(block, m, temp); block[temp] ^= 0x80; - wage_set_rate(state, block, 0x20); + wage_set_rate(state, block); + state[0] ^= 0x20; wage_permute(state); memcpy(c, block, temp); @@ -145,7 +149,8 @@ int wage_aead_decrypt while (clen >= WAGE_RATE) { wage_get_rate(state, block); lw_xor_block(block, c, WAGE_RATE); - wage_set_rate(state, c, 0x20); + wage_set_rate(state, c); + state[0] ^= 0x20; wage_permute(state); memcpy(m, block, WAGE_RATE); c += WAGE_RATE; @@ -157,7 +162,8 @@ int wage_aead_decrypt lw_xor_block_2_src(block + 8, block, c, temp); memcpy(block, c, temp); block[temp] ^= 0x80; - wage_set_rate(state, block, 0x20); + wage_set_rate(state, block); + state[0] ^= 0x20; wage_permute(state); memcpy(m, block + 8, temp);